In [1]:
from io import StringIO
import requests
import seaborn
import json
import pandas as pd
import re
from collections import defaultdict
import timeit
import matplotlib.pyplot
import numpy

%matplotlib inline

In [2]:
df_data_2 = pd.read_csv('h1b_kaggle.csv')

In [3]:
df_data_2.head(10)


Out[3]:
Unnamed: 0 CASE_STATUS EMPLOYER_NAME SOC_NAME JOB_TITLE FULL_TIME_POSITION PREVAILING_WAGE YEAR WORKSITE lon lat
0 1 CERTIFIED-WITHDRAWN UNIVERSITY OF MICHIGAN BIOCHEMISTS AND BIOPHYSICISTS POSTDOCTORAL RESEARCH FELLOW N 36067.0 2016.0 ANN ARBOR, MICHIGAN -83.743038 42.280826
1 2 CERTIFIED-WITHDRAWN GOODMAN NETWORKS, INC. CHIEF EXECUTIVES CHIEF OPERATING OFFICER Y 242674.0 2016.0 PLANO, TEXAS -96.698886 33.019843
2 3 CERTIFIED-WITHDRAWN PORTS AMERICA GROUP, INC. CHIEF EXECUTIVES CHIEF PROCESS OFFICER Y 193066.0 2016.0 JERSEY CITY, NEW JERSEY -74.077642 40.728158
3 4 CERTIFIED-WITHDRAWN GATES CORPORATION, A WHOLLY-OWNED SUBSIDIARY O... CHIEF EXECUTIVES REGIONAL PRESIDEN, AMERICAS Y 220314.0 2016.0 DENVER, COLORADO -104.990251 39.739236
4 5 WITHDRAWN PEABODY INVESTMENTS CORP. CHIEF EXECUTIVES PRESIDENT MONGOLIA AND INDIA Y 157518.4 2016.0 ST. LOUIS, MISSOURI -90.199404 38.627003
5 6 CERTIFIED-WITHDRAWN BURGER KING CORPORATION CHIEF EXECUTIVES EXECUTIVE V P, GLOBAL DEVELOPMENT AND PRESIDEN... Y 225000.0 2016.0 MIAMI, FLORIDA -80.191790 25.761680
6 7 CERTIFIED-WITHDRAWN BT AND MK ENERGY AND COMMODITIES CHIEF EXECUTIVES CHIEF OPERATING OFFICER Y 91021.0 2016.0 HOUSTON, TEXAS -95.369803 29.760427
7 8 CERTIFIED-WITHDRAWN GLOBO MOBILE TECHNOLOGIES, INC. CHIEF EXECUTIVES CHIEF OPERATIONS OFFICER Y 150000.0 2016.0 SAN JOSE, CALIFORNIA -121.886329 37.338208
8 9 CERTIFIED-WITHDRAWN ESI COMPANIES INC. CHIEF EXECUTIVES PRESIDENT Y 127546.0 2016.0 MEMPHIS, TEXAS NaN NaN
9 10 WITHDRAWN LESSARD INTERNATIONAL LLC CHIEF EXECUTIVES PRESIDENT Y 154648.0 2016.0 VIENNA, VIRGINIA -77.265260 38.901222

In [4]:
df_data_2[['CASE_STATUS', 'SOC_NAME']]
#use this to make a chart with two indexes


Out[4]:
CASE_STATUS SOC_NAME
0 CERTIFIED-WITHDRAWN BIOCHEMISTS AND BIOPHYSICISTS
1 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
2 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
3 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
4 WITHDRAWN CHIEF EXECUTIVES
5 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
6 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
7 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
8 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
9 WITHDRAWN CHIEF EXECUTIVES
10 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
11 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
12 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
13 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
14 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
15 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
16 CERTIFIED-WITHDRAWN FINANCIAL MANAGERS
17 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
18 CERTIFIED CHIEF EXECUTIVES
19 CERTIFIED CHIEF EXECUTIVES
20 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
21 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
22 CERTIFIED CHIEF EXECUTIVES
23 CERTIFIED CHIEF EXECUTIVES
24 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
25 CERTIFIED CHIEF EXECUTIVES
26 CERTIFIED-WITHDRAWN CHIEF EXECUTIVES
27 CERTIFIED CHIEF EXECUTIVES
28 CERTIFIED CHIEF EXECUTIVES
29 CERTIFIED CHIEF EXECUTIVES
... ... ...
3002428 WITHDRAWN Physical Therapists
3002429 WITHDRAWN Physical Therapists
3002430 WITHDRAWN Computer Systems Analysts
3002431 WITHDRAWN Computer Support Specialists
3002432 WITHDRAWN Computer Systems Analysts
3002433 WITHDRAWN Biochemists and Biophysicists
3002434 WITHDRAWN Computer Systems Analysts
3002435 WITHDRAWN Computer Programmers
3002436 WITHDRAWN Computer Programmers
3002437 WITHDRAWN Health Specialties Teachers, Postsecondary
3002438 WITHDRAWN Database Administrators
3002439 WITHDRAWN Biological Scientists, All Other
3002440 WITHDRAWN Software Developers, Applications
3002441 WITHDRAWN Materials Scientists
3002442 WITHDRAWN Market Research Analysts and Marketing Special...
3002443 WITHDRAWN Commercial and Industrial Designers
3002444 WITHDRAWN Commercial and Industrial Designers
3002445 NaN NaN
3002446 NaN NaN
3002447 NaN NaN
3002448 NaN NaN
3002449 NaN NaN
3002450 NaN NaN
3002451 NaN NaN
3002452 NaN NaN
3002453 NaN NaN
3002454 NaN NaN
3002455 NaN NaN
3002456 NaN NaN
3002457 NaN NaN

3002458 rows × 2 columns


In [5]:
df_data_2['SOC_NAME'].value_counts()


Out[5]:
Computer Systems Analysts                                  291170
Computer Programmers                                       226574
SOFTWARE DEVELOPERS, APPLICATIONS                          221783
COMPUTER SYSTEMS ANALYSTS                                  215353
Software Developers, Applications                          192933
COMPUTER PROGRAMMERS                                       171972
COMPUTER OCCUPATIONS, ALL OTHER                            108555
Computer Occupations, All Other                             69315
Software Developers, Systems Software                       44500
SOFTWARE DEVELOPERS, SYSTEMS SOFTWARE                       39356
Management Analysts                                         38756
Financial Analysts                                          34141
Computer Software Engineers, Applications                   33325
Accountants and Auditors                                    30708
MANAGEMENT ANALYSTS                                         28117
Mechanical Engineers                                        26219
Computer Occupations, All Other*                            25628
Electronics Engineers, Except Computer                      23323
Electrical Engineers                                        22162
Market Research Analysts and Marketing Specialists          22158
ACCOUNTANTS AND AUDITORS                                    22114
Physicians and Surgeons, All Other                          22010
Database Administrators                                     21937
NETWORK AND COMPUTER SYSTEMS ADMINISTRATORS                 21642
FINANCIAL ANALYSTS                                          19399
Operations Research Analysts                                19171
Network and Computer Systems Administrators                 18445
MECHANICAL ENGINEERS                                        18017
COMPUTER SYSTEMS ANALYST                                    17426
DATABASE ADMINISTRATORS                                     17045
                                                            ...  
OCCUPATIONAL THERAPY ASSISTANTS                                 1
Service Unit Operators, Oil, Gas, and Mining                    1
Dietitians and Nutritionists, R&D (ACWIA Only)                  1
MEDICAL TECHNOLOGISTS                                           1
SYSTEMS ENGINEERS/ARCHITECTS                                    1
ENGLISH LANGUAGE &  LIT TEACHERS, POST SECONDARY                1
SECURITIES, AND COMMODITIES TRADERS                             1
FOREIGN LANGUAGE LITERATURE TEACHERS, POSTSECOND                1
FOREIGN LANGUAGE AND LITERATURE TEACHERS, POST SEC              1
SOFTWARE DEVELOPERS,  APPLICATION                               1
SECURITIES COMMODITIES FINANCIAL SERV SALES AGENTS              1
MEDICAL SCIENTISTS EXCEPT EPIDIMIOLOGISTS                       1
HYDROLOGISITS                                                   1
Earth Drillers, Except Oil and Gas                              1
Hazardous Materials Removal Workers                             1
ELECTRONICS ENGINEERS; EXCEPT COMPUTER                          1
FOREIGN LANGUAGE AND LITERATURE TEACHERS, POSTSECONDARY         1
Funeral Attendants                                              1
TUTORS                                                          1
27-3031                                                         1
MARKET RESEARCH ANALYSTS AND MARKETING  SPECIALIST              1
HEALTH SPECIALITY TEACHERS, POSTSECONDARY                       1
ART, DRAMA, AND MUSIC TEACHER, POSTSECONDARY                    1
ENVIRONMENTAL SCIENTIST                                         1
TRAINING AND DEVELOPMENT MANAGER                                1
SALES AND RELATED WORKERS, ALL OTHER*                           1
MEDICAL SCIENTIST, EXCEPT EPIDEMIOLOGIST                        1
COMPUTER PROGRAMMGER                                            1
OPERATIONS SYSTEMS ANALYST                                      1
BUSINESS OPERATIONS SPECIALIST, ALL OTHERS                      1
Name: SOC_NAME, dtype: int64

In [8]:
df_data_2['SOC_NAME'].value_counts().count()


Out[8]:
2132

In [9]:
cleandata1=df_data_2.copy()

In [10]:
cleandata1['SOC_NAME']=df_data_2['SOC_NAME'].str.lower()
cleandata1['EMPLOYER_NAME']=df_data_2['EMPLOYER_NAME'].str.lower()
cleandata1['JOB_TITLE']=df_data_2['JOB_TITLE'].str.lower()

In [13]:
cleandata1.to_csv('h1b_kaggle_lowwered.csv', sep='\t')

Here, we transform some strings to lowercase. This is because there are duplicate entries in the dataset which in both upper and lower. This increases redundancy


In [9]:
cleandata1['SOC_NAME'].value_counts()


Out[9]:
computer systems analysts                                             506523
software developers, applications                                     414716
computer programmers                                                  398546
computer occupations, all other                                       177870
software developers, systems software                                  83856
management analysts                                                    66873
financial analysts                                                     53540
accountants and auditors                                               52822
mechanical engineers                                                   44236
network and computer systems administrators                            40087
database administrators                                                38982
market research analysts and marketing specialists                     37737
electronics engineers, except computer                                 36574
operations research analysts                                           34260
electrical engineers                                                   34108
physicians and surgeons, all other                                     33526
computer software engineers, applications                              33387
computer and information systems managers                              27536
computer occupations, all other*                                       26254
medical scientists, except epidemiologists                             26159
physical therapists                                                    21994
biochemists and biophysicists                                          21245
industrial engineers                                                   19370
computer systems analyst                                               17426
statisticians                                                          17101
biological scientists, all other                                       16367
marketing managers                                                     16310
civil engineers                                                        15970
web developers                                                         15000
internists, general                                                    13367
                                                                       ...  
urologists                                                                 1
financial analysis                                                         1
commerical and industrial designers                                        1
17-2051                                                                    1
training and development manager                                           1
job printers                                                               1
occupational health and safety specialists and tec                         1
finance managers                                                           1
electonics engineers, except computer                                      1
industrial designers                                                       1
software developers, applications,  non r&d                                1
property real estate & community association mgrs                          1
business systems analysts                                                  1
secondary school teachers, except special and vocational education         1
chemist                                                                    1
health speciality teacher                                                  1
pediatrician                                                               1
computer systems engineers/arquitects                                      1
medical and clincial laboratory technologists                              1
computer systems engineers/architect                                       1
lodging manager                                                            1
new accounts clerks                                                        1
designer, all other                                                        1
atmospheric, earth, marine, & space sciences teach                         1
network & computer systems administrator                                   1
foreign language and literature teachers, post sec                         1
computer occuptations, all other                                           1
engineering teachers postsecondary                                         1
cashiers                                                                   1
business intelligence anaylsts                                             1
Name: SOC_NAME, dtype: int64

There is still alot of redundancy we can exploit. We can generalize these strings remove specalized strings into more general form. For example 'software engineer, senior' needs to be reduced to 'software engineer'. This also applies to the other columns with string attributes.


In [10]:
cleandata1['SOC_NAME'].value_counts().count()


Out[10]:
1585

We have reduced the number of names down to 1585


In [11]:
reducedf = pd.DataFrame({'SOC_NAME': cleandata1['SOC_NAME'].value_counts().index, 'Count':cleandata1['SOC_NAME'].value_counts().values})
#df['Counts'] = df.groupby(['SOC_NAME'])['Count'].transform('count') #I don't remember what I was trying to do here.
#df = df.set_index(['SOC_NAME'])
reducedf


Out[11]:
Count SOC_NAME
0 506523 computer systems analysts
1 414716 software developers, applications
2 398546 computer programmers
3 177870 computer occupations, all other
4 83856 software developers, systems software
5 66873 management analysts
6 53540 financial analysts
7 52822 accountants and auditors
8 44236 mechanical engineers
9 40087 network and computer systems administrators
10 38982 database administrators
11 37737 market research analysts and marketing special...
12 36574 electronics engineers, except computer
13 34260 operations research analysts
14 34108 electrical engineers
15 33526 physicians and surgeons, all other
16 33387 computer software engineers, applications
17 27536 computer and information systems managers
18 26254 computer occupations, all other*
19 26159 medical scientists, except epidemiologists
20 21994 physical therapists
21 21245 biochemists and biophysicists
22 19370 industrial engineers
23 17426 computer systems analyst
24 17101 statisticians
25 16367 biological scientists, all other
26 16310 marketing managers
27 15970 civil engineers
28 15000 web developers
29 13367 internists, general
... ... ...
1555 1 urologists
1556 1 financial analysis
1557 1 commerical and industrial designers
1558 1 17-2051
1559 1 training and development manager
1560 1 job printers
1561 1 occupational health and safety specialists and...
1562 1 finance managers
1563 1 electonics engineers, except computer
1564 1 industrial designers
1565 1 software developers, applications, non r&d
1566 1 property real estate & community association mgrs
1567 1 business systems analysts
1568 1 secondary school teachers, except special and ...
1569 1 chemist
1570 1 health speciality teacher
1571 1 pediatrician
1572 1 computer systems engineers/arquitects
1573 1 medical and clincial laboratory technologists
1574 1 computer systems engineers/architect
1575 1 lodging manager
1576 1 new accounts clerks
1577 1 designer, all other
1578 1 atmospheric, earth, marine, & space sciences t...
1579 1 network & computer systems administrator
1580 1 foreign language and literature teachers, post...
1581 1 computer occuptations, all other
1582 1 engineering teachers postsecondary
1583 1 cashiers
1584 1 business intelligence anaylsts

1585 rows × 2 columns


In [12]:
reducedf['Name1'] = ''

In [13]:
reducedf


Out[13]:
Count SOC_NAME Name1
0 506523 computer systems analysts
1 414716 software developers, applications
2 398546 computer programmers
3 177870 computer occupations, all other
4 83856 software developers, systems software
5 66873 management analysts
6 53540 financial analysts
7 52822 accountants and auditors
8 44236 mechanical engineers
9 40087 network and computer systems administrators
10 38982 database administrators
11 37737 market research analysts and marketing special...
12 36574 electronics engineers, except computer
13 34260 operations research analysts
14 34108 electrical engineers
15 33526 physicians and surgeons, all other
16 33387 computer software engineers, applications
17 27536 computer and information systems managers
18 26254 computer occupations, all other*
19 26159 medical scientists, except epidemiologists
20 21994 physical therapists
21 21245 biochemists and biophysicists
22 19370 industrial engineers
23 17426 computer systems analyst
24 17101 statisticians
25 16367 biological scientists, all other
26 16310 marketing managers
27 15970 civil engineers
28 15000 web developers
29 13367 internists, general
... ... ... ...
1555 1 urologists
1556 1 financial analysis
1557 1 commerical and industrial designers
1558 1 17-2051
1559 1 training and development manager
1560 1 job printers
1561 1 occupational health and safety specialists and...
1562 1 finance managers
1563 1 electonics engineers, except computer
1564 1 industrial designers
1565 1 software developers, applications, non r&d
1566 1 property real estate & community association mgrs
1567 1 business systems analysts
1568 1 secondary school teachers, except special and ...
1569 1 chemist
1570 1 health speciality teacher
1571 1 pediatrician
1572 1 computer systems engineers/arquitects
1573 1 medical and clincial laboratory technologists
1574 1 computer systems engineers/architect
1575 1 lodging manager
1576 1 new accounts clerks
1577 1 designer, all other
1578 1 atmospheric, earth, marine, & space sciences t...
1579 1 network & computer systems administrator
1580 1 foreign language and literature teachers, post...
1581 1 computer occuptations, all other
1582 1 engineering teachers postsecondary
1583 1 cashiers
1584 1 business intelligence anaylsts

1585 rows × 3 columns


In [14]:
reducedf.iloc[3]['Count'] #example of accessing a location


Out[14]:
177870

In [15]:
%%timeit
for index, row in reducedf.iterrows():
    names = row['SOC_NAME'].split(",")
    if(names[0].endswith('*')):
        reducedf.set_value([index],['Name1'],(names[0][:-1]))
    if not (names[0].endswith('s')):
        reducedf.set_value([index],['Name1'],(names[0]+'s'))
    else:
        reducedf.set_value([index],['Name1'],names[0])


1 loop, best of 3: 919 ms per loop

In [16]:
reducedf


Out[16]:
Count SOC_NAME Name1
0 506523 computer systems analysts computer systems analysts
1 414716 software developers, applications software developers
2 398546 computer programmers computer programmers
3 177870 computer occupations, all other computer occupations
4 83856 software developers, systems software software developers
5 66873 management analysts management analysts
6 53540 financial analysts financial analysts
7 52822 accountants and auditors accountants and auditors
8 44236 mechanical engineers mechanical engineers
9 40087 network and computer systems administrators network and computer systems administrators
10 38982 database administrators database administrators
11 37737 market research analysts and marketing special... market research analysts and marketing special...
12 36574 electronics engineers, except computer electronics engineers
13 34260 operations research analysts operations research analysts
14 34108 electrical engineers electrical engineers
15 33526 physicians and surgeons, all other physicians and surgeons
16 33387 computer software engineers, applications computer software engineers
17 27536 computer and information systems managers computer and information systems managers
18 26254 computer occupations, all other* computer occupations
19 26159 medical scientists, except epidemiologists medical scientists
20 21994 physical therapists physical therapists
21 21245 biochemists and biophysicists biochemists and biophysicists
22 19370 industrial engineers industrial engineers
23 17426 computer systems analyst computer systems analysts
24 17101 statisticians statisticians
25 16367 biological scientists, all other biological scientists
26 16310 marketing managers marketing managers
27 15970 civil engineers civil engineers
28 15000 web developers web developers
29 13367 internists, general internists
... ... ... ...
1555 1 urologists urologists
1556 1 financial analysis financial analysis
1557 1 commerical and industrial designers commerical and industrial designers
1558 1 17-2051 17-2051s
1559 1 training and development manager training and development managers
1560 1 job printers job printers
1561 1 occupational health and safety specialists and... occupational health and safety specialists and...
1562 1 finance managers finance managers
1563 1 electonics engineers, except computer electonics engineers
1564 1 industrial designers industrial designers
1565 1 software developers, applications, non r&d software developers
1566 1 property real estate & community association mgrs property real estate & community association mgrs
1567 1 business systems analysts business systems analysts
1568 1 secondary school teachers, except special and ... secondary school teachers
1569 1 chemist chemists
1570 1 health speciality teacher health speciality teachers
1571 1 pediatrician pediatricians
1572 1 computer systems engineers/arquitects computer systems engineers/arquitects
1573 1 medical and clincial laboratory technologists medical and clincial laboratory technologists
1574 1 computer systems engineers/architect computer systems engineers/architects
1575 1 lodging manager lodging managers
1576 1 new accounts clerks new accounts clerks
1577 1 designer, all other designers
1578 1 atmospheric, earth, marine, & space sciences t... atmospherics
1579 1 network & computer systems administrator network & computer systems administrators
1580 1 foreign language and literature teachers, post... foreign language and literature teachers
1581 1 computer occuptations, all other computer occuptations
1582 1 engineering teachers postsecondary engineering teachers postsecondarys
1583 1 cashiers cashiers
1584 1 business intelligence anaylsts business intelligence anaylsts

1585 rows × 3 columns


In [17]:
cleandata1['SOC_NAME'].value_counts().count()


Out[17]:
1585

In [18]:
(cleandata1.loc[(cleandata1['SOC_NAME']=='software developers, appllications')]) #an example of a query


Out[18]:
Unnamed: 0 CASE_STATUS EMPLOYER_NAME SOC_NAME JOB_TITLE FULL_TIME_POSITION PREVAILING_WAGE YEAR WORKSITE lon lat
894328 894329 CERTIFIED meridiansoft, inc. software developers, appllications software developer applications Y 62421.0 2015.0 LEWIS CENTER, OHIO -83.010099 40.198388

This person messed up the SOC_NAME


In [19]:
reducedf['Name1'].value_counts()


Out[19]:
software developers                                 37
securities                                          14
secondary school teachers                           14
electronics engineers                               13
sales representatives                               13
computer occupations                                11
computer software engineers                         10
elementary school teachers                          10
computer systems analysts                           10
mechanical engineers                                 9
special education teachers                           8
education administrators                             7
medical scientists                                   7
computer programmers                                 7
middle school teachers                               7
electrical engineers                                 7
civil engineers                                      6
cooks                                                6
foreign language and literature teachers             6
computer network architects                          5
dentists                                             5
computer hardware engineers                          5
atmospherics                                         5
meetings                                             4
purchasing agents                                    4
biological scientists                                4
healthcare practitioners and technical workers       4
educationals                                         4
internists                                           4
engineers                                            4
                                                    ..
farm and home management advisors                    1
elementary teachers                                  1
orthotists and prosthetists                          1
first-line supervisors of retail sales workers       1
softwware developers                                 1
network and systems administrators                   1
it project managers                                  1
interpreters and translators                         1
agricultural and food scientists                     1
psychologists                                        1
foreign language/lit. teachers                       1
emergency medical technicians and paramedics         1
public relations and fundraising managers            1
cardiovascular technologists and technicians         1
welders                                              1
occupational therapy assistants                      1
nursing instructors and teachers                     1
floral designers                                     1
residential advisors                                 1
machine feeders and offbearers                       1
family practice physicians                           1
solutions architects                                 1
application programmers                              1
mental health and substance abuse social workers     1
psychology teachers                                  1
supply chain managers                                1
title examiners                                      1
painters and illustrators                            1
jewelers                                             1
financial advisor/accountants                        1
Name: Name1, dtype: int64

In [20]:
reducedf['Name1'].value_counts().count()


Out[20]:
1134

We have now cut the number of names in half from the original number.


In [21]:
reducedf.sort_values(['Name1'])


Out[21]:
Count SOC_NAME Name1
1370 1 13-2011.01 13-2011.01s
1109 2 15-1121 15-1121s
1189 1 15-1132 15-1132s
1326 1 15-1199.01 sw quality assurance engnrs & testers 15-1199.01 sw quality assurance engnrs & testers
1429 1 15-1199.01 15-1199.01s
1337 1 15-1199.08, business intelligence analysts 15-1199.08s
1558 1 17-2051 17-2051s
1472 1 17-2072 17-2072s
1340 1 27-3031 27-3031s
1509 1 29-1064.00-obstetricians and gynecologists 29-1064.00-obstetricians and gynecologists
1426 1 <font><font>carpinteros</font></font> <font><font>carpinteros</font></font>s
1055 2 able seamen able seamens
74 3958 accountants accountants
1046 2 accountant accountants
7 52822 accountants and auditors accountants and auditors
565 26 actors actors
80 3486 actuaries actuaries
1461 1 acupuncturists acupuncturists
893 4 adhesive bonding machine operators and tenders adhesive bonding machine operators and tenders
823 6 administrative law judges, adjudicators, and h... administrative law judges
152 1361 administrative services managers administrative services managers
323 230 adult basic and secondary education and litera... adult basic and secondary education and litera...
395 116 adult basic and secondary education and literacy adult basic and secondary education and literacys
1057 2 adult basic second educ and lit teach and instruc adult basic second educ and lit teach and inst...
485 50 adult literacy, remedial education, and ged te... adult literacys
1484 1 advertising and promortions managers advertising and promortions managers
94 2925 advertising and promotions managers advertising and promotions managers
1164 2 advertising and promotions manager advertising and promotions managers
342 195 advertising sales agents advertising sales agents
351 179 aerospace engineering and operations technicians aerospace engineering and operations technicians
... ... ... ...
1555 1 urologists urologists
1113 2 ushers, lobby attendants, and ticket takers ushers
422 90 validation engineers validation engineers
902 4 validation engineer validation engineers
113 2398 veterinarians veterinarians
600 19 veterinary assistants and laboratory animal ca... veterinary assistants and laboratory animal ca...
592 20 veterinary assistants and laboratory animal veterinary assistants and laboratory animals
426 87 veterinary technologists and technicians veterinary technologists and technicians
1478 1 vice president of investor relations vice president of investor relations
981 3 video game designers video game designers
989 3 vocational education teachers, middle school vocational education teachers
331 215 vocational education teachers, postsecondary vocational education teachers
632 16 vocational education teachers, secondary school vocational education teachers
493 47 vocational education teachers postsecondary vocational education teachers postsecondarys
636 16 watch repairers watch repairers
888 4 water and wastewater treatment plant and syste... water and wastewater treatment plant and syste...
956 3 water/wastewater engineers water/wastewater engineers
437 81 web administrators web administrators
1154 2 web developer web developers
28 15000 web developers web developers
1032 2 weighers, measurers, checkers, and samplers, weighers
634 16 welders, cutters, solderers, and brazers welders
1467 1 welding, soldering, and brazing machine setter... weldings
1437 1 welding, soldering, and brazing machine setters, weldings
205 746 wholesale and retail buyers, except farm products wholesale and retail buyers
1203 1 wind energy engineers wind energy engineers
1513 1 wind turbine service technicians wind turbine service technicians
714 10 woodworkers, all other woodworkers
181 981 writers and authors writers and authors
237 557 zoologists and wildlife biologists zoologists and wildlife biologists

1585 rows × 3 columns


In [22]:
reducedf['Name2'] = ""

In [23]:
%%timeit
regex = re.compile('[^a-z\s]')

for index, row in reducedf.iterrows():
    reducedf.set_value([index],['Name2'],(regex.sub('', row['Name1'])))


1 loop, best of 3: 918 ms per loop

In [24]:
reducedf.sort_values(['Name1'])


Out[24]:
Count SOC_NAME Name1 Name2
1370 1 13-2011.01 13-2011.01s s
1109 2 15-1121 15-1121s s
1189 1 15-1132 15-1132s s
1326 1 15-1199.01 sw quality assurance engnrs & testers 15-1199.01 sw quality assurance engnrs & testers sw quality assurance engnrs testers
1429 1 15-1199.01 15-1199.01s s
1337 1 15-1199.08, business intelligence analysts 15-1199.08s s
1558 1 17-2051 17-2051s s
1472 1 17-2072 17-2072s s
1340 1 27-3031 27-3031s s
1509 1 29-1064.00-obstetricians and gynecologists 29-1064.00-obstetricians and gynecologists obstetricians and gynecologists
1426 1 <font><font>carpinteros</font></font> <font><font>carpinteros</font></font>s fontfontcarpinterosfontfonts
1055 2 able seamen able seamens able seamens
74 3958 accountants accountants accountants
1046 2 accountant accountants accountants
7 52822 accountants and auditors accountants and auditors accountants and auditors
565 26 actors actors actors
80 3486 actuaries actuaries actuaries
1461 1 acupuncturists acupuncturists acupuncturists
893 4 adhesive bonding machine operators and tenders adhesive bonding machine operators and tenders adhesive bonding machine operators and tenders
823 6 administrative law judges, adjudicators, and h... administrative law judges administrative law judges
152 1361 administrative services managers administrative services managers administrative services managers
323 230 adult basic and secondary education and litera... adult basic and secondary education and litera... adult basic and secondary education and litera...
395 116 adult basic and secondary education and literacy adult basic and secondary education and literacys adult basic and secondary education and literacys
1057 2 adult basic second educ and lit teach and instruc adult basic second educ and lit teach and inst... adult basic second educ and lit teach and inst...
485 50 adult literacy, remedial education, and ged te... adult literacys adult literacys
1484 1 advertising and promortions managers advertising and promortions managers advertising and promortions managers
94 2925 advertising and promotions managers advertising and promotions managers advertising and promotions managers
1164 2 advertising and promotions manager advertising and promotions managers advertising and promotions managers
342 195 advertising sales agents advertising sales agents advertising sales agents
351 179 aerospace engineering and operations technicians aerospace engineering and operations technicians aerospace engineering and operations technicians
... ... ... ... ...
1555 1 urologists urologists urologists
1113 2 ushers, lobby attendants, and ticket takers ushers ushers
422 90 validation engineers validation engineers validation engineers
902 4 validation engineer validation engineers validation engineers
113 2398 veterinarians veterinarians veterinarians
600 19 veterinary assistants and laboratory animal ca... veterinary assistants and laboratory animal ca... veterinary assistants and laboratory animal ca...
592 20 veterinary assistants and laboratory animal veterinary assistants and laboratory animals veterinary assistants and laboratory animals
426 87 veterinary technologists and technicians veterinary technologists and technicians veterinary technologists and technicians
1478 1 vice president of investor relations vice president of investor relations vice president of investor relations
981 3 video game designers video game designers video game designers
989 3 vocational education teachers, middle school vocational education teachers vocational education teachers
331 215 vocational education teachers, postsecondary vocational education teachers vocational education teachers
632 16 vocational education teachers, secondary school vocational education teachers vocational education teachers
493 47 vocational education teachers postsecondary vocational education teachers postsecondarys vocational education teachers postsecondarys
636 16 watch repairers watch repairers watch repairers
888 4 water and wastewater treatment plant and syste... water and wastewater treatment plant and syste... water and wastewater treatment plant and syste...
956 3 water/wastewater engineers water/wastewater engineers waterwastewater engineers
437 81 web administrators web administrators web administrators
1154 2 web developer web developers web developers
28 15000 web developers web developers web developers
1032 2 weighers, measurers, checkers, and samplers, weighers weighers
634 16 welders, cutters, solderers, and brazers welders welders
1467 1 welding, soldering, and brazing machine setter... weldings weldings
1437 1 welding, soldering, and brazing machine setters, weldings weldings
205 746 wholesale and retail buyers, except farm products wholesale and retail buyers wholesale and retail buyers
1203 1 wind energy engineers wind energy engineers wind energy engineers
1513 1 wind turbine service technicians wind turbine service technicians wind turbine service technicians
714 10 woodworkers, all other woodworkers woodworkers
181 981 writers and authors writers and authors writers and authors
237 557 zoologists and wildlife biologists zoologists and wildlife biologists zoologists and wildlife biologists

1585 rows × 4 columns


In [25]:
reducedf['Name2'].value_counts().count()


Out[25]:
1118

Striping charactors did not help much.

At this point investigated a spellchecker in python. Was not able to get something to work within enviroment.

I am now going to consider removing entries that are unique, with a count of one.


In [26]:
dfName2Check = pd.DataFrame({'Name2': reducedf['Name2'].value_counts().index, 'Count':reducedf['Name2'].value_counts().values})

In [27]:
dfName2Check


Out[27]:
Count Name2
0 37 software developers
1 14 securities
2 14 secondary school teachers
3 13 sales representatives
4 13 electronics engineers
5 11 computer occupations
6 10 computer software engineers
7 10 elementary school teachers
8 10 computer systems analysts
9 9 mechanical engineers
10 8 s
11 8 special education teachers
12 7 education administrators
13 7 medical scientists
14 7 electrical engineers
15 7 computer programmers
16 7 middle school teachers
17 6 foreign language and literature teachers
18 6 cooks
19 6 civil engineers
20 5 dentists
21 5 computer hardware engineers
22 5 computer network architects
23 5 atmospherics
24 4 electrical and electronics repairers
25 4 compensations
26 4 business operations specialists
27 4 propertys
28 4 criminal justice and law enforcement teachers
29 4 health diagnosing and treating practitioners
... ... ...
1088 1 software develpers
1089 1 mental health and substance abuse social workers
1090 1 nursing assistants
1091 1 mdl schl teachers
1092 1 marine engineers and naval architects
1093 1 market research analysts marketing specialists
1094 1 orthotists and prosthetists
1095 1 softwware developers
1096 1 network and systems administrators
1097 1 it project managers
1098 1 interpreters and translators
1099 1 agricultural and food scientists
1100 1 psychologists
1101 1 respiratory therapy technicians
1102 1 firstline supervisorsmanagers of nonretail sales
1103 1 emergency medical technicians and paramedics
1104 1 public relations and fundraising managers
1105 1 cardiovascular technologists and technicians
1106 1 welders
1107 1 occupational therapy assistants
1108 1 nursing instructors and teachers
1109 1 floral designers
1110 1 residential advisors
1111 1 fundraisers
1112 1 machine feeders and offbearers
1113 1 firstline supervisorsmanagers
1114 1 family practice physicians
1115 1 solutions architects
1116 1 application programmers
1117 1 freight and cargo inspectors

1118 rows × 2 columns

Theres a problem, I am not getting a real representation of the occurence of names in the data. I now need to do something where I can get the actual number of occurences.


In [28]:
cleandata1


Out[28]:
Unnamed: 0 CASE_STATUS EMPLOYER_NAME SOC_NAME JOB_TITLE FULL_TIME_POSITION PREVAILING_WAGE YEAR WORKSITE lon lat
0 1 CERTIFIED-WITHDRAWN university of michigan biochemists and biophysicists postdoctoral research fellow N 36067.00 2016.0 ANN ARBOR, MICHIGAN -83.743038 42.280826
1 2 CERTIFIED-WITHDRAWN goodman networks, inc. chief executives chief operating officer Y 242674.00 2016.0 PLANO, TEXAS -96.698886 33.019843
2 3 CERTIFIED-WITHDRAWN ports america group, inc. chief executives chief process officer Y 193066.00 2016.0 JERSEY CITY, NEW JERSEY -74.077642 40.728158
3 4 CERTIFIED-WITHDRAWN gates corporation, a wholly-owned subsidiary o... chief executives regional presiden, americas Y 220314.00 2016.0 DENVER, COLORADO -104.990251 39.739236
4 5 WITHDRAWN peabody investments corp. chief executives president mongolia and india Y 157518.40 2016.0 ST. LOUIS, MISSOURI -90.199404 38.627003
5 6 CERTIFIED-WITHDRAWN burger king corporation chief executives executive v p, global development and presiden... Y 225000.00 2016.0 MIAMI, FLORIDA -80.191790 25.761680
6 7 CERTIFIED-WITHDRAWN bt and mk energy and commodities chief executives chief operating officer Y 91021.00 2016.0 HOUSTON, TEXAS -95.369803 29.760427
7 8 CERTIFIED-WITHDRAWN globo mobile technologies, inc. chief executives chief operations officer Y 150000.00 2016.0 SAN JOSE, CALIFORNIA -121.886329 37.338208
8 9 CERTIFIED-WITHDRAWN esi companies inc. chief executives president Y 127546.00 2016.0 MEMPHIS, TEXAS NaN NaN
9 10 WITHDRAWN lessard international llc chief executives president Y 154648.00 2016.0 VIENNA, VIRGINIA -77.265260 38.901222
10 11 CERTIFIED-WITHDRAWN h.j. heinz company chief executives chief information officer, heinz north america Y 182978.00 2016.0 PITTSBURGH, PENNSYLVANIA -79.995886 40.440625
11 12 CERTIFIED-WITHDRAWN dow corning corporation chief executives vice president and chief human resources officer Y 163717.00 2016.0 MIDLAND, MICHIGAN -84.247212 43.615583
12 13 CERTIFIED-WITHDRAWN acushnet company chief executives treasurer and coo Y 203860.80 2016.0 FAIRHAVEN, MASSACHUSETTS NaN NaN
13 14 CERTIFIED-WITHDRAWN biocair, inc. chief executives chief commercial officer Y 252637.00 2016.0 MIAMI, FLORIDA -80.191790 25.761680
14 15 CERTIFIED-WITHDRAWN newmont mining corporation chief executives board member Y 105914.00 2016.0 GREENWOOD VILLAGE, COLORADO -104.950814 39.617210
15 16 CERTIFIED-WITHDRAWN vricon, inc. chief executives chief financial officer Y 153046.00 2016.0 STERLING, VIRGINIA -77.429130 39.006699
16 17 CERTIFIED-WITHDRAWN cardiac science corporation financial managers vice president of finance Y 90834.00 2016.0 WAUKESHA, WISCONSIN -88.231481 43.011678
17 18 CERTIFIED-WITHDRAWN westfield corporation chief executives general manager, operations Y 164050.00 2016.0 LOS ANGELES, CALIFORNIA -118.243685 34.052234
18 19 CERTIFIED quicklogix llc chief executives ceo Y 187200.00 2016.0 SANTA CLARA, CALIFORNIA -121.955236 37.354108
19 20 CERTIFIED mcchrystal group, llc chief executives president, northeast region Y 241842.00 2016.0 ALEXANDRIA, VIRGINIA -77.046921 38.804835
20 21 CERTIFIED-WITHDRAWN cuddle barn, inc. chief executives chief operating officer (coo) Y 117998.00 2016.0 COMMERCE, CALIFORNIA -118.159793 34.000569
21 22 CERTIFIED-WITHDRAWN westfield corporation chief executives general manager, operations Y 164050.00 2016.0 LOS ANGELES, CALIFORNIA -118.243685 34.052234
22 23 CERTIFIED lomics, llc chief executives ceo Y 99986.00 2016.0 SAN DIEGO, CALIFORNIA -117.161084 32.715738
23 24 CERTIFIED uc university high school education inc. chief executives chief financial officer Y 99986.00 2016.0 CHULA VISTA, CALIFORNIA -117.084196 32.640054
24 25 CERTIFIED-WITHDRAWN vms communications llc chief executives chief operating officer Y 159370.00 2016.0 MIAMI, FLORIDA -80.191790 25.761680
25 26 CERTIFIED quicklogix, inc. chief executives ceo Y 187200.00 2016.0 SANTA CLARA, CALIFORNIA -121.955236 37.354108
26 27 CERTIFIED-WITHDRAWN foodessentials corporation chief executives chief executive officer Y 130853.00 2016.0 CHICAGO, ILLINOIS -87.629798 41.878114
27 28 CERTIFIED hello inc. chief executives chief business officer Y 215862.00 2016.0 SAN FRANCISCO, CALIFORNIA -122.419415 37.774929
28 29 CERTIFIED umbel corp chief executives vice president of engineering Y 192088.00 2016.0 AUSTIN, TEXAS -97.743061 30.267153
29 30 CERTIFIED perspectives of freedom foundation, inc chief executives executive director Y 95295.98 2016.0 WESTON, FLORIDA -80.399775 26.100365
... ... ... ... ... ... ... ... ... ... ... ...
3002428 3002429 WITHDRAWN avant healthcare professionals physical therapists physical therapist Y 53601.60 2011.0 LEBANON, PENNSYLVANIA NaN NaN
3002429 3002430 WITHDRAWN avant healthcare professionals physical therapists physical therapist Y 53601.60 2011.0 LEBANON, PENNSYLVANIA NaN NaN
3002430 3002431 WITHDRAWN trisync technologies, inc. computer systems analysts computer system analyst Y 55245.00 2011.0 EDISON, NEW JERSEY -74.412095 40.518715
3002431 3002432 WITHDRAWN at last sportswear inc. computer support specialists computer support specialist Y 36837.00 2011.0 SECAUCUS, NEW JERSEY -74.056530 40.789545
3002432 3002433 WITHDRAWN trisync technologies, inc. computer systems analysts computer system analyst Y 55245.00 2011.0 EDISON, NEW JERSEY -74.412095 40.518715
3002433 3002434 WITHDRAWN the university of texas southwestern medical c... biochemists and biophysicists instructor Y 36795.00 2011.0 DALLAS, TEXAS -96.796988 32.776664
3002434 3002435 WITHDRAWN trisync technologies, inc. computer systems analysts computer system analyst Y 55245.00 2011.0 EDISON, NEW JERSEY -74.412095 40.518715
3002435 3002436 WITHDRAWN marlabs, inc computer programmers programmer/analyst Y 77730.00 2011.0 DURHAM, NORTH CAROLINA -78.898619 35.994033
3002436 3002437 WITHDRAWN xtron software services, inc. computer programmers computer programmers Y 89232.00 2011.0 SANTA CLARA, CALIFORNIA -121.955236 37.354108
3002437 3002438 WITHDRAWN university of mississippi medical center health specialties teachers, postsecondary assistant professor of anesthesiology Y 34510.00 2011.0 JACKSON, MISSISSIPPI -90.184810 32.298757
3002438 3002439 WITHDRAWN canvas infotech, inc. database administrators data analyst Y 53082.00 2011.0 PLEASANTON, CALIFORNIA -121.874679 37.662431
3002439 3002440 WITHDRAWN new york university biological scientists, all other adjunct associate professor Y 37336.00 2011.0 NEW YORK, NEW YORK -74.005941 40.712784
3002440 3002441 WITHDRAWN oracle america, inc. software developers, applications software engineer (software developer 2) Y 64800.00 2011.0 REDWOOD SHORES, CALIFORNIA -122.245536 37.536413
3002441 3002442 WITHDRAWN b & d dental corp. materials scientists materials scientist N 70553.60 2011.0 WEST VALLEY, UTAH -112.001050 40.691613
3002442 3002443 WITHDRAWN medtech staffing & solutions, inc market research analysts and marketing special... market research analyst Y 33800.00 2011.0 AKRON, OHIO -81.519005 41.081445
3002443 3002444 WITHDRAWN larsen & toubro limited commercial and industrial designers design engineer Y 59800.00 2011.0 CHELMSFORD, MASSACHUSETTS -71.367284 42.599814
3002444 3002445 WITHDRAWN larsen & toubro limited commercial and industrial designers design engineer Y 59800.00 2011.0 CHELMSFORD, MASSACHUSETTS -71.367284 42.599814
3002445 3002446 NaN NaN NaN NaN NaN NaN NaN BERKLEY HEIGHTS, NEW JERSEY -74.431052 40.680873
3002446 3002447 NaN NaN NaN NaN NaN NaN NaN SCHENECTADY , NEW YORK -73.939569 42.814243
3002447 3002448 NaN NaN NaN NaN NaN NaN NaN MOUTAIN VIEW, CALIFORNIA -122.083851 37.386052
3002448 3002449 NaN NaN NaN NaN NaN NaN NaN ST.PAUL, MINNESOTA -93.089958 44.953703
3002449 3002450 NaN NaN NaN NaN NaN NaN NaN NEW TOWN, PENNSYLVANIA -74.932260 40.228337
3002450 3002451 NaN NaN NaN NaN NaN NaN NaN WESTMINISTER, COLORADO -105.037205 39.836653
3002451 3002452 NaN NaN NaN NaN NaN NaN NaN FREEMONT, CALIFORNIA -121.988572 37.548270
3002452 3002453 NaN NaN NaN NaN NaN NaN NaN LAVERGNE, TENNESSEE -86.581939 36.015618
3002453 3002454 NaN NaN NaN NaN NaN NaN NaN NYC, NEW YORK -74.005941 40.712784
3002454 3002455 NaN NaN NaN NaN NaN NaN NaN SOUTH LAKE, TEXAS -97.134178 32.941236
3002455 3002456 NaN NaN NaN NaN NaN NaN NaN CLINTON, NEW JERSEY -74.909890 40.636768
3002456 3002457 NaN NaN NaN NaN NaN NaN NaN OWINGS MILL, MARYLAND -76.780253 39.419550
3002457 3002458 NaN NaN NaN NaN NaN NaN NaN ALTANTA, GEORGIA -84.387982 33.748995

3002458 rows × 11 columns

Going to go back to this df and create a copy. Will then over write the Soc_Name with the reduced name I have.


In [29]:
cleandata2 = cleandata1.copy()

In [31]:
%%time
name = cleandata2.iloc[3002440]['SOC_NAME'] #example of accessing a location
print(name)
newname = reducedf.loc[(reducedf['SOC_NAME']==name)]
newname1 = newname.iloc[0]['Name2']
print(newname1)


software developers, applications
software developers
Wall time: 2.52 ms

Test code to test algorithm

At this point of development, I have switched to running the python notebook locally. I am having trouble with how slow the cloud service is, and I am not using spark yet.


In [38]:
%%timeit
errormap = defaultdict(list)

for index, row in cleandata2.iterrows():
    name = row['SOC_NAME']
    newname = reducedf.loc[(reducedf['SOC_NAME']==name)]
    try:
        newname1 = newname.iloc[0]['Name2']
        cleandata2.set_value([index],['SOC_NAME'],newname1)
    except:
        quicklist = ["Something went wrong", name, newname]
        errormap[index].append(quicklist)
    break


1 loop, best of 3: 1.03 s per loop

Here, we try to apply what we have reduced to the dataframe. We will see if we have made any progress, and what we can do next.

However, I have a very VERY large problem. The logic to use the reduced names, takes 1 s. With about 3000000 rows, that would be about 35 days.

I have no idea how to get around this right now.

I think I might be able to do this, by converting the pandas dataframe into a spark rdd. From there I could user map and map reduce functions.

But I would need a lot more time in order to do this.

At this point. I have spent many hours on this.

I feel very defeated.

I am now going to see if there is some way I can streamline the algoritm.


In [48]:
%%timeit
for index, row in cleandata2.iterrows():
    try:
        cleandata2.set_value([index],['SOC_NAME'],reducedf.loc[(reducedf['SOC_NAME']==row['SOC_NAME'])].iloc[0]['Name2'])
    except:
        pass
    break


1 loop, best of 3: 997 ms per loop

Very little gains. I don't know what to do. We may have to switch over in entire to rapidminer.


In [37]:
cleandata2['SOC_NAME'].value_counts().count()
cleandata2['SOC_NAME'].value_counts()


Out[37]:
computer systems analysts                                             506523
software developers, applications                                     414716
computer programmers                                                  398546
computer occupations, all other                                       177870
software developers, systems software                                  83856
management analysts                                                    66873
financial analysts                                                     53540
accountants and auditors                                               52822
mechanical engineers                                                   44236
network and computer systems administrators                            40087
database administrators                                                38982
market research analysts and marketing specialists                     37737
electronics engineers, except computer                                 36574
operations research analysts                                           34260
electrical engineers                                                   34108
physicians and surgeons, all other                                     33526
computer software engineers, applications                              33387
computer and information systems managers                              27536
computer occupations, all other*                                       26254
medical scientists, except epidemiologists                             26159
physical therapists                                                    21994
biochemists and biophysicists                                          21245
industrial engineers                                                   19370
computer systems analyst                                               17426
statisticians                                                          17101
biological scientists, all other                                       16367
marketing managers                                                     16310
civil engineers                                                        15970
web developers                                                         15000
internists, general                                                    13367
                                                                       ...  
urologists                                                                 1
financial analysis                                                         1
commerical and industrial designers                                        1
17-2051                                                                    1
training and development manager                                           1
job printers                                                               1
occupational health and safety specialists and tec                         1
finance managers                                                           1
electonics engineers, except computer                                      1
industrial designers                                                       1
software developers, applications,  non r&d                                1
property real estate & community association mgrs                          1
business systems analysts                                                  1
secondary school teachers, except special and vocational education         1
chemist                                                                    1
health speciality teacher                                                  1
pediatrician                                                               1
computer systems engineers/arquitects                                      1
medical and clincial laboratory technologists                              1
computer systems engineers/architect                                       1
lodging manager                                                            1
new accounts clerks                                                        1
designer, all other                                                        1
atmospheric, earth, marine, & space sciences teach                         1
network & computer systems administrator                                   1
foreign language and literature teachers, post sec                         1
computer occuptations, all other                                           1
engineering teachers postsecondary                                         1
cashiers                                                                   1
business intelligence anaylsts                                             1
Name: SOC_NAME, dtype: int64

Debating what will happen if I remove all entries in the dataset with which a SOC_NAME that only occures once. How many entries will this remove? Can we ignore entries like this? What about entries with an occurence count of 2? 3? Do these entries matter? What do we do if they matter?

At this point, I will have to do as much as I can in python, and then switch over to rapidminer.

I have an idea on how I could go through the rows faster.


In [ ]:
%%timeit
num=3000000
i=1
while i < num:
    try:
        name = cleandata2.iloc[i]['SOC_NAME']
        newname = reducedf.loc[(reducedf['SOC_NAME']==name)].iloc[0]['Name2']
        #cleandata2.set_value(i,['SOC_NAME'], newname)
        i=i+1
    except:
        pass

its the setting of the values that takes the majority of the time.


In [ ]:


In [ ]:

Following code is for generating heat mape of lat and lon data


In [6]:
df_data_2


Out[6]:
Unnamed: 0 CASE_STATUS EMPLOYER_NAME SOC_NAME JOB_TITLE FULL_TIME_POSITION PREVAILING_WAGE YEAR WORKSITE lon lat
0 1 CERTIFIED-WITHDRAWN UNIVERSITY OF MICHIGAN BIOCHEMISTS AND BIOPHYSICISTS POSTDOCTORAL RESEARCH FELLOW N 36067.00 2016.0 ANN ARBOR, MICHIGAN -83.743038 42.280826
1 2 CERTIFIED-WITHDRAWN GOODMAN NETWORKS, INC. CHIEF EXECUTIVES CHIEF OPERATING OFFICER Y 242674.00 2016.0 PLANO, TEXAS -96.698886 33.019843
2 3 CERTIFIED-WITHDRAWN PORTS AMERICA GROUP, INC. CHIEF EXECUTIVES CHIEF PROCESS OFFICER Y 193066.00 2016.0 JERSEY CITY, NEW JERSEY -74.077642 40.728158
3 4 CERTIFIED-WITHDRAWN GATES CORPORATION, A WHOLLY-OWNED SUBSIDIARY O... CHIEF EXECUTIVES REGIONAL PRESIDEN, AMERICAS Y 220314.00 2016.0 DENVER, COLORADO -104.990251 39.739236
4 5 WITHDRAWN PEABODY INVESTMENTS CORP. CHIEF EXECUTIVES PRESIDENT MONGOLIA AND INDIA Y 157518.40 2016.0 ST. LOUIS, MISSOURI -90.199404 38.627003
5 6 CERTIFIED-WITHDRAWN BURGER KING CORPORATION CHIEF EXECUTIVES EXECUTIVE V P, GLOBAL DEVELOPMENT AND PRESIDEN... Y 225000.00 2016.0 MIAMI, FLORIDA -80.191790 25.761680
6 7 CERTIFIED-WITHDRAWN BT AND MK ENERGY AND COMMODITIES CHIEF EXECUTIVES CHIEF OPERATING OFFICER Y 91021.00 2016.0 HOUSTON, TEXAS -95.369803 29.760427
7 8 CERTIFIED-WITHDRAWN GLOBO MOBILE TECHNOLOGIES, INC. CHIEF EXECUTIVES CHIEF OPERATIONS OFFICER Y 150000.00 2016.0 SAN JOSE, CALIFORNIA -121.886329 37.338208
8 9 CERTIFIED-WITHDRAWN ESI COMPANIES INC. CHIEF EXECUTIVES PRESIDENT Y 127546.00 2016.0 MEMPHIS, TEXAS NaN NaN
9 10 WITHDRAWN LESSARD INTERNATIONAL LLC CHIEF EXECUTIVES PRESIDENT Y 154648.00 2016.0 VIENNA, VIRGINIA -77.265260 38.901222
10 11 CERTIFIED-WITHDRAWN H.J. HEINZ COMPANY CHIEF EXECUTIVES CHIEF INFORMATION OFFICER, HEINZ NORTH AMERICA Y 182978.00 2016.0 PITTSBURGH, PENNSYLVANIA -79.995886 40.440625
11 12 CERTIFIED-WITHDRAWN DOW CORNING CORPORATION CHIEF EXECUTIVES VICE PRESIDENT AND CHIEF HUMAN RESOURCES OFFICER Y 163717.00 2016.0 MIDLAND, MICHIGAN -84.247212 43.615583
12 13 CERTIFIED-WITHDRAWN ACUSHNET COMPANY CHIEF EXECUTIVES TREASURER AND COO Y 203860.80 2016.0 FAIRHAVEN, MASSACHUSETTS NaN NaN
13 14 CERTIFIED-WITHDRAWN BIOCAIR, INC. CHIEF EXECUTIVES CHIEF COMMERCIAL OFFICER Y 252637.00 2016.0 MIAMI, FLORIDA -80.191790 25.761680
14 15 CERTIFIED-WITHDRAWN NEWMONT MINING CORPORATION CHIEF EXECUTIVES BOARD MEMBER Y 105914.00 2016.0 GREENWOOD VILLAGE, COLORADO -104.950814 39.617210
15 16 CERTIFIED-WITHDRAWN VRICON, INC. CHIEF EXECUTIVES CHIEF FINANCIAL OFFICER Y 153046.00 2016.0 STERLING, VIRGINIA -77.429130 39.006699
16 17 CERTIFIED-WITHDRAWN CARDIAC SCIENCE CORPORATION FINANCIAL MANAGERS VICE PRESIDENT OF FINANCE Y 90834.00 2016.0 WAUKESHA, WISCONSIN -88.231481 43.011678
17 18 CERTIFIED-WITHDRAWN WESTFIELD CORPORATION CHIEF EXECUTIVES GENERAL MANAGER, OPERATIONS Y 164050.00 2016.0 LOS ANGELES, CALIFORNIA -118.243685 34.052234
18 19 CERTIFIED QUICKLOGIX LLC CHIEF EXECUTIVES CEO Y 187200.00 2016.0 SANTA CLARA, CALIFORNIA -121.955236 37.354108
19 20 CERTIFIED MCCHRYSTAL GROUP, LLC CHIEF EXECUTIVES PRESIDENT, NORTHEAST REGION Y 241842.00 2016.0 ALEXANDRIA, VIRGINIA -77.046921 38.804835
20 21 CERTIFIED-WITHDRAWN CUDDLE BARN, INC. CHIEF EXECUTIVES CHIEF OPERATING OFFICER (COO) Y 117998.00 2016.0 COMMERCE, CALIFORNIA -118.159793 34.000569
21 22 CERTIFIED-WITHDRAWN WESTFIELD CORPORATION CHIEF EXECUTIVES GENERAL MANAGER, OPERATIONS Y 164050.00 2016.0 LOS ANGELES, CALIFORNIA -118.243685 34.052234
22 23 CERTIFIED LOMICS, LLC CHIEF EXECUTIVES CEO Y 99986.00 2016.0 SAN DIEGO, CALIFORNIA -117.161084 32.715738
23 24 CERTIFIED UC UNIVERSITY HIGH SCHOOL EDUCATION INC. CHIEF EXECUTIVES CHIEF FINANCIAL OFFICER Y 99986.00 2016.0 CHULA VISTA, CALIFORNIA -117.084196 32.640054
24 25 CERTIFIED-WITHDRAWN VMS COMMUNICATIONS LLC CHIEF EXECUTIVES CHIEF OPERATING OFFICER Y 159370.00 2016.0 MIAMI, FLORIDA -80.191790 25.761680
25 26 CERTIFIED QUICKLOGIX, INC. CHIEF EXECUTIVES CEO Y 187200.00 2016.0 SANTA CLARA, CALIFORNIA -121.955236 37.354108
26 27 CERTIFIED-WITHDRAWN FOODESSENTIALS CORPORATION CHIEF EXECUTIVES CHIEF EXECUTIVE OFFICER Y 130853.00 2016.0 CHICAGO, ILLINOIS -87.629798 41.878114
27 28 CERTIFIED HELLO INC. CHIEF EXECUTIVES CHIEF BUSINESS OFFICER Y 215862.00 2016.0 SAN FRANCISCO, CALIFORNIA -122.419415 37.774929
28 29 CERTIFIED UMBEL CORP CHIEF EXECUTIVES VICE PRESIDENT OF ENGINEERING Y 192088.00 2016.0 AUSTIN, TEXAS -97.743061 30.267153
29 30 CERTIFIED PERSPECTIVES OF FREEDOM FOUNDATION, INC CHIEF EXECUTIVES EXECUTIVE DIRECTOR Y 95295.98 2016.0 WESTON, FLORIDA -80.399775 26.100365
... ... ... ... ... ... ... ... ... ... ... ...
3002428 3002429 WITHDRAWN AVANT HEALTHCARE PROFESSIONALS Physical Therapists PHYSICAL THERAPIST Y 53601.60 2011.0 LEBANON, PENNSYLVANIA NaN NaN
3002429 3002430 WITHDRAWN AVANT HEALTHCARE PROFESSIONALS Physical Therapists PHYSICAL THERAPIST Y 53601.60 2011.0 LEBANON, PENNSYLVANIA NaN NaN
3002430 3002431 WITHDRAWN TRISYNC TECHNOLOGIES, INC. Computer Systems Analysts COMPUTER SYSTEM ANALYST Y 55245.00 2011.0 EDISON, NEW JERSEY -74.412095 40.518715
3002431 3002432 WITHDRAWN AT LAST SPORTSWEAR INC. Computer Support Specialists COMPUTER SUPPORT SPECIALIST Y 36837.00 2011.0 SECAUCUS, NEW JERSEY -74.056530 40.789545
3002432 3002433 WITHDRAWN TRISYNC TECHNOLOGIES, INC. Computer Systems Analysts COMPUTER SYSTEM ANALYST Y 55245.00 2011.0 EDISON, NEW JERSEY -74.412095 40.518715
3002433 3002434 WITHDRAWN THE UNIVERSITY OF TEXAS SOUTHWESTERN MEDICAL C... Biochemists and Biophysicists INSTRUCTOR Y 36795.00 2011.0 DALLAS, TEXAS -96.796988 32.776664
3002434 3002435 WITHDRAWN TRISYNC TECHNOLOGIES, INC. Computer Systems Analysts COMPUTER SYSTEM ANALYST Y 55245.00 2011.0 EDISON, NEW JERSEY -74.412095 40.518715
3002435 3002436 WITHDRAWN MARLABS, INC Computer Programmers PROGRAMMER/ANALYST Y 77730.00 2011.0 DURHAM, NORTH CAROLINA -78.898619 35.994033
3002436 3002437 WITHDRAWN XTRON SOFTWARE SERVICES, INC. Computer Programmers COMPUTER PROGRAMMERS Y 89232.00 2011.0 SANTA CLARA, CALIFORNIA -121.955236 37.354108
3002437 3002438 WITHDRAWN UNIVERSITY OF MISSISSIPPI MEDICAL CENTER Health Specialties Teachers, Postsecondary ASSISTANT PROFESSOR OF ANESTHESIOLOGY Y 34510.00 2011.0 JACKSON, MISSISSIPPI -90.184810 32.298757
3002438 3002439 WITHDRAWN CANVAS INFOTECH, INC. Database Administrators DATA ANALYST Y 53082.00 2011.0 PLEASANTON, CALIFORNIA -121.874679 37.662431
3002439 3002440 WITHDRAWN NEW YORK UNIVERSITY Biological Scientists, All Other ADJUNCT ASSOCIATE PROFESSOR Y 37336.00 2011.0 NEW YORK, NEW YORK -74.005941 40.712784
3002440 3002441 WITHDRAWN ORACLE AMERICA, INC. Software Developers, Applications SOFTWARE ENGINEER (SOFTWARE DEVELOPER 2) Y 64800.00 2011.0 REDWOOD SHORES, CALIFORNIA -122.245536 37.536413
3002441 3002442 WITHDRAWN B & D DENTAL CORP. Materials Scientists MATERIALS SCIENTIST N 70553.60 2011.0 WEST VALLEY, UTAH -112.001050 40.691613
3002442 3002443 WITHDRAWN MEDTECH STAFFING & SOLUTIONS, INC Market Research Analysts and Marketing Special... MARKET RESEARCH ANALYST Y 33800.00 2011.0 AKRON, OHIO -81.519005 41.081445
3002443 3002444 WITHDRAWN LARSEN & TOUBRO LIMITED Commercial and Industrial Designers DESIGN ENGINEER Y 59800.00 2011.0 CHELMSFORD, MASSACHUSETTS -71.367284 42.599814
3002444 3002445 WITHDRAWN LARSEN & TOUBRO LIMITED Commercial and Industrial Designers DESIGN ENGINEER Y 59800.00 2011.0 CHELMSFORD, MASSACHUSETTS -71.367284 42.599814
3002445 3002446 NaN NaN NaN NaN NaN NaN NaN BERKLEY HEIGHTS, NEW JERSEY -74.431052 40.680873
3002446 3002447 NaN NaN NaN NaN NaN NaN NaN SCHENECTADY , NEW YORK -73.939569 42.814243
3002447 3002448 NaN NaN NaN NaN NaN NaN NaN MOUTAIN VIEW, CALIFORNIA -122.083851 37.386052
3002448 3002449 NaN NaN NaN NaN NaN NaN NaN ST.PAUL, MINNESOTA -93.089958 44.953703
3002449 3002450 NaN NaN NaN NaN NaN NaN NaN NEW TOWN, PENNSYLVANIA -74.932260 40.228337
3002450 3002451 NaN NaN NaN NaN NaN NaN NaN WESTMINISTER, COLORADO -105.037205 39.836653
3002451 3002452 NaN NaN NaN NaN NaN NaN NaN FREEMONT, CALIFORNIA -121.988572 37.548270
3002452 3002453 NaN NaN NaN NaN NaN NaN NaN LAVERGNE, TENNESSEE -86.581939 36.015618
3002453 3002454 NaN NaN NaN NaN NaN NaN NaN NYC, NEW YORK -74.005941 40.712784
3002454 3002455 NaN NaN NaN NaN NaN NaN NaN SOUTH LAKE, TEXAS -97.134178 32.941236
3002455 3002456 NaN NaN NaN NaN NaN NaN NaN CLINTON, NEW JERSEY -74.909890 40.636768
3002456 3002457 NaN NaN NaN NaN NaN NaN NaN OWINGS MILL, MARYLAND -76.780253 39.419550
3002457 3002458 NaN NaN NaN NaN NaN NaN NaN ALTANTA, GEORGIA -84.387982 33.748995

3002458 rows × 11 columns


In [3]:
lon = []
lon = df_data_2['lon'].values,df_data_2['lat'].values
lon


Out[3]:
(array([-83.7430378, -96.6988856, -74.0776417, ..., -74.90989  ,
        -76.7802528, -84.3879824]),
 array([ 42.2808256,  33.0198431,  40.7281575, ...,  40.6367682,
         39.4195499,  33.7489954]))

In [14]:
#for long, lat in df_data_2['lon'].values for in df_data_2['lat'].values
geo = [list(a) if a!=['nan', 'nan'] for a in zip(df_data_2['lon'].values,df_data_2['lat'].values)]


  File "<ipython-input-14-3d1e6af45427>", line 2
    geo = [list(a) if a!=['nan', 'nan'] for a in zip(df_data_2['lon'].values,df_data_2['lat'].values)]
                                          ^
SyntaxError: invalid syntax

In [13]:
geo


Out[13]:
[[-83.743037799999996, 42.2808256],
 [-96.698885599999997, 33.019843100000003],
 [-74.077641700000001, 40.728157500000002],
 [-104.990251, 39.739235799999996],
 [-90.199404200000004, 38.627002500000003],
 [-80.1917902, 25.7616798],
 [-95.369802799999988, 29.7604267],
 [-121.88632859999998, 37.338208200000004],
 [nan, nan],
 [-77.265260400000003, 38.901222499999996],
 [-79.995886400000003, 40.440624799999995],
 [-84.247211599999986, 43.615582500000002],
 [nan, nan],
 [-80.1917902, 25.7616798],
 [-104.95081409999999, 39.617210100000001],
 [-77.429129799999998, 39.006699299999994],
 [-88.231481299999999, 43.011678400000001],
 [-118.24368490000002, 34.052234200000001],
 [-121.95523559999999, 37.354107899999995],
 [-77.046921400000002, 38.804835499999996],
 [-118.1597929, 34.0005691],
 [-118.24368490000002, 34.052234200000001],
 [-117.1610838, 32.715738000000002],
 [-117.08419550000001, 32.6400541],
 [-80.1917902, 25.7616798],
 [-121.95523559999999, 37.354107899999995],
 [-87.62979820000001, 41.878113599999999],
 [-122.4194155, 37.774929499999999],
 [-97.743060799999995, 30.267153000000004],
 [-80.399774800000003, 26.100365399999998],
 [-77.036870700000009, 38.907192299999998],
 [-87.62979820000001, 41.878113599999999],
 [-78.825562099999999, 35.823483000000003],
 [-80.1917902, 25.7616798],
 [-71.441810099999998, 41.911012299999996],
 [-87.62979820000001, 41.878113599999999],
 [-87.62979820000001, 41.878113599999999],
 [-81.453449200000009, 41.605326599999998],
 [-88.17340209999999, 41.817807000000002],
 [-122.21706609999998, 47.482877600000002],
 [-84.55553470000001, 42.732534999999999],
 [-87.828954799999991, 42.127526700000004],
 [-84.294089900000003, 34.075376200000008],
 [-117.27114809999999, 32.991154999999999],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-74.185420899999997, 40.806754600000005],
 [-117.1610838, 32.715738000000002],
 [-121.98857190000001, 37.548269700000006],
 [-117.1610838, 32.715738000000002],
 [-95.824395599999988, 29.785785300000001],
 [-95.369802799999988, 29.7604267],
 [-84.198579000000009, 34.028925899999997],
 [-122.21706609999998, 47.482877600000002],
 [nan, nan],
 [nan, nan],
 [-121.98857190000001, 37.548269700000006],
 [nan, nan],
 [-97.497483799999998, 25.901747199999999],
 [-78.476678100000001, 38.029305900000004],
 [-122.4194155, 37.774929499999999],
 [-86.908065500000006, 40.425868600000001],
 [-76.612189299999997, 39.290384799999998],
 [-80.09420870000001, 26.934224600000004],
 [-80.09420870000001, 26.934224600000004],
 [-81.317844600000001, 28.758883299999997],
 [-84.549932699999999, 33.952601999999999],
 [-81.365624200000013, 28.661108899999999],
 [-78.476678100000001, 38.029305900000004],
 [-122.7140548, 38.440428999999995],
 [-77.008587599999998, 40.214256499999998],
 [-122.7140548, 38.440428999999995],
 [-96.948894499999994, 32.814017699999994],
 [-83.352709700000005, 42.368369999999999],
 [-73.756231700000001, 42.652579299999992],
 [-81.794810299999995, 26.142035800000002],
 [-81.379236500000005, 28.538335499999999],
 [-122.27111370000002, 37.804363700000003],
 [-117.1610838, 32.715738000000002],
 [-80.278105699999998, 25.857596300000001],
 [-80.278105699999998, 25.857596300000001],
 [-87.62979820000001, 41.878113599999999],
 [-87.62979820000001, 41.878113599999999],
 [-76.8620327, 38.968511200000002],
 [-90.577067499999998, 38.663108299999998],
 [-76.8620327, 38.968511200000002],
 [-80.355330199999997, 25.8195424],
 [-81.379236500000005, 28.538335499999999],
 [-80.1917902, 25.7616798],
 [-84.512019600000002, 39.103118200000004],
 [-118.28169299999999, 33.831674499999998],
 [nan, nan],
 [nan, nan],
 [-121.42522269999999, 37.739651299999998],
 [-71.144773200000003, 42.158432399999995],
 [-87.62979820000001, 41.878113599999999],
 [-115.13982959999998, 36.169941200000004],
 [-78.476678100000001, 38.029305900000004],
 [-82.719267099999996, 28.244176799999998],
 [nan, nan],
 [-82.285924699999995, 27.937801],
 [-118.40035630000001, 34.073620399999996],
 [-74.005941299999989, 40.712783700000003],
 [-80.296255500000001, 26.007764999999999],
 [-118.4911912, 34.019454299999992],
 [-87.62979820000001, 41.878113599999999],
 [-87.62979820000001, 41.878113599999999],
 [-77.357002799999989, 38.958630700000001],
 [nan, nan],
 [-72.251756900000004, 43.6422934],
 [-74.005941299999989, 40.712783700000003],
 [-80.1917902, 25.7616798],
 [-91.140319599999998, 30.458282899999997],
 [-74.005941299999989, 40.712783700000003],
 [-87.828954799999991, 42.127526700000004],
 [-85.587228600000003, 42.291706899999994],
 [-82.719267099999996, 28.244176799999998],
 [-74.005941299999989, 40.712783700000003],
 [-80.278105699999998, 25.857596300000001],
 [-84.387982399999999, 33.748995399999998],
 [-122.272747, 37.871592600000007],
 [-77.036870700000009, 38.907192299999998],
 [-118.30896240000001, 33.888348700000002],
 [-93.470786000000004, 44.854685600000003],
 [-93.470786000000004, 44.854685600000003],
 [-80.752607999999995, 32.216315999999999],
 [-80.843126699999999, 35.227086900000003],
 [-111.9260519, 33.494170399999994],
 [-122.272747, 37.871592600000007],
 [-122.20598329999999, 47.676892700000003],
 [-119.17705159999998, 34.197504799999997],
 [-71.228964099999999, 42.443037200000006],
 [-83.234102799999988, 42.687532300000001],
 [nan, nan],
 [nan, nan],
 [-71.058880099999996, 42.360082500000004],
 [nan, nan],
 [-74.005941299999989, 40.712783700000003],
 [-77.944710200000003, 34.225725500000003],
 [-74.005941299999989, 40.712783700000003],
 [-80.053374599999998, 26.715342400000001],
 [-122.33207079999998, 47.606209499999999],
 [-80.1917902, 25.7616798],
 [-77.429993899999999, 43.212285100000003],
 [-82.665099200000014, 28.034184700000001],
 [nan, nan],
 [-83.221873099999996, 42.473368799999996],
 [-104.990251, 39.739235799999996],
 [nan, nan],
 [-84.269644900000003, 36.010356099999996],
 [-80.25659499999999, 26.166971100000001],
 [-122.08385109999999, 37.386051700000003],
 [-117.1610838, 32.715738000000002],
 [-71.058880099999996, 42.360082500000004],
 [-85.179714199999992, 42.321152200000007],
 [-74.011653600000002, 40.893246900000001],
 [-115.13982959999998, 36.169941200000004],
 [-122.4442906, 47.252876799999996],
 [-95.369802799999988, 29.7604267],
 [-77.201370499999996, 39.143440600000005],
 [-84.514376099999993, 33.883992600000006],
 [-81.379236500000005, 28.538335499999999],
 [-80.1917902, 25.7616798],
 [-84.213530899999995, 33.941212700000001],
 [-122.27111370000002, 37.804363700000003],
 [-80.1917902, 25.7616798],
 [-77.036870700000009, 38.907192299999998],
 [-104.7091322, 40.423314200000007],
 [-80.1917902, 25.7616798],
 [-118.24368490000002, 34.052234200000001],
 [-97.195013799999998, 32.991234999999996],
 [-122.08385109999999, 37.386051700000003],
 [-74.005941299999989, 40.712783700000003],
 [-122.1817252, 37.452959800000002],
 [-75.165221500000001, 39.9525839],
 [-71.058880099999996, 42.360082500000004],
 [-117.086421, 33.119206800000001],
 [-122.4194155, 37.774929499999999],
 [-86.868889900000013, 35.925063700000003],
 [-90.577067499999998, 38.663108299999998],
 [-119.17705159999998, 34.197504799999997],
 [-117.79469420000001, 33.6839473],
 [-83.234102799999988, 42.687532300000001],
 [-119.17705159999998, 34.197504799999997],
 [-122.1817252, 37.452959800000002],
 [-83.352709700000005, 42.368369999999999],
 [-104.7091322, 40.423314200000007],
 [nan, nan],
 [nan, nan],
 [nan, nan],
 [-118.24368490000002, 34.052234200000001],
 [-83.149775099999999, 42.606409499999998],
 [-118.24368490000002, 34.052234200000001],
 [-157.8583333, 21.306944399999999],
 [-118.24368490000002, 34.052234200000001],
 [-86.908065500000006, 40.425868600000001],
 [-84.294089900000003, 34.075376200000008],
 [nan, nan],
 [-111.65853370000001, 40.233843799999995],
 [-80.579510999999997, 35.408751700000003],
 [-84.213530899999995, 33.941212700000001],
 [-111.9738304, 41.222999999999999],
 [-77.152757800000003, 39.0839973],
 [nan, nan],
 [-75.539787799999999, 39.739072100000001],
 [-80.1917902, 25.7616798],
 [-80.148379000000006, 25.981202399999997],
 [-80.148379000000006, 25.981202399999997],
 [-76.945530099999999, 38.955944200000005],
 [-104.990251, 39.739235799999996],
 [nan, nan],
 [-119.69819009999999, 34.420830500000001],
 [-64.703197700000004, 17.746639699999999],
 [-71.058880099999996, 42.360082500000004],
 [-95.235250099999988, 38.971668900000004],
 [-118.4911912, 34.019454299999992],
 [-79.995886400000003, 40.440624799999995],
 [-80.1917902, 25.7616798],
 [-118.24368490000002, 34.052234200000001],
 [-80.1917902, 25.7616798],
 [-80.137317400000015, 26.122438600000002],
 [-86.518604500000009, 35.982841200000003],
 [-86.518604500000009, 35.982841200000003],
 [-79.037738799999985, 43.0962143],
 [-86.518604500000009, 35.982841200000003],
 [-117.1610838, 32.715738000000002],
 [-71.058880099999996, 42.360082500000004],
 [-117.08419550000001, 32.6400541],
 [-86.518604500000009, 35.982841200000003],
 [-122.4194155, 37.774929499999999],
 [-81.099834200000004, 32.0835407],
 [nan, nan],
 [-86.158068, 39.768402999999999],
 [-95.235250099999988, 38.971668900000004],
 [-111.65853370000001, 40.233843799999995],
 [-111.65853370000001, 40.233843799999995],
 [-95.369802799999988, 29.7604267],
 [-111.65853370000001, 40.233843799999995],
 [-111.65853370000001, 40.233843799999995],
 [nan, nan],
 [-89.401230200000001, 43.073051700000008],
 [nan, nan],
 [-95.369802799999988, 29.7604267],
 [nan, nan],
 [-80.1917902, 25.7616798],
 [-118.4911912, 34.019454299999992],
 [nan, nan],
 [-111.65853370000001, 40.233843799999995],
 [-97.497483799999998, 25.901747199999999],
 [nan, nan],
 [-111.65853370000001, 40.233843799999995],
 [nan, nan],
 [-80.139212099999995, 25.956481199999999],
 [-86.158068, 39.768402999999999],
 [-85.668086299999999, 42.9633599],
 [-93.99939959999999, 44.163577500000002],
 [-96.948894499999994, 32.814017699999994],
 [-87.894522899999998, 42.698074900000002],
 [nan, nan],
 [-74.364612199999996, 40.714637599999996],
 [-77.475266700000006, 38.750948799999996],
 [-77.036870700000009, 38.907192299999998],
 [-80.1917902, 25.7616798],
 [-87.62979820000001, 41.878113599999999],
 [-84.387982399999999, 33.748995399999998],
 [-80.843126699999999, 35.227086900000003],
 [-87.62979820000001, 41.878113599999999],
 [-84.512019600000002, 39.103118200000004],
 [-117.1610838, 32.715738000000002],
 [-84.387982399999999, 33.748995399999998],
 [-87.62979820000001, 41.878113599999999],
 [nan, nan],
 [-91.140319599999998, 30.458282899999997],
 [-74.364612199999996, 40.714637599999996],
 [-87.62979820000001, 41.878113599999999],
 [-73.8201337, 41.055096899999995],
 [-84.512019600000002, 39.103118200000004],
 [-121.96237509999999, 37.235807799999996],
 [-80.1917902, 25.7616798],
 [-88.282566799999998, 42.035408399999994],
 [-118.24368490000002, 34.052234200000001],
 [-80.162824799999996, 25.693712999999999],
 [-75.514912800000005, 40.130382200000007],
 [-89.401230200000001, 43.073051700000008],
 [-74.58529200000001, 40.338254299999996],
 [-104.9719243, 39.8680412],
 [nan, nan],
 [-80.1917902, 25.7616798],
 [-71.152276499999999, 42.479261799999996],
 [-71.4161565, 42.279285999999999],
 [nan, nan],
 [-118.1445155, 34.147784899999998],
 [-81.655651000000006, 30.332183799999999],
 [-87.840625000000003, 42.258634200000003],
 [145.72978909999998, 15.151515300000002],
 [-84.387982399999999, 33.748995399999998],
 [nan, nan],
 [145.72978909999998, 15.151515300000002],
 [-118.24368490000002, 34.052234200000001],
 [-71.152276499999999, 42.479261799999996],
 [-72.927883499999993, 41.308273999999997],
 [-75.514912800000005, 40.130382200000007],
 [-74.005941299999989, 40.712783700000003],
 [-84.144637599999996, 34.002878600000003],
 [145.72978909999998, 15.151515300000002],
 [-93.99939959999999, 44.163577500000002],
 [-93.99939959999999, 44.163577500000002],
 [145.72978909999998, 15.151515300000002],
 [-83.9207392, 35.960638399999993],
 [-87.828954799999991, 42.127526700000004],
 [-77.036870700000009, 38.907192299999998],
 [-84.294089900000003, 34.075376200000008],
 [-77.036870700000009, 38.907192299999998],
 [-84.309939, 39.360058600000002],
 [-117.1610838, 32.715738000000002],
 [-70.255325900000003, 43.661470999999999],
 [-117.79469420000001, 33.6839473],
 [-74.005941299999989, 40.712783700000003],
 [-118.34062879999999, 33.835849200000006],
 [nan, nan],
 [-82.998794200000006, 39.961175500000003],
 [-84.398276299999992, 39.515057599999999],
 [nan, nan],
 [-81.379236500000005, 28.538335499999999],
 [-117.43504799999999, 34.092233499999999],
 [-80.1917902, 25.7616798],
 [-122.40774979999999, 37.654656000000003],
 [-88.320071499999997, 41.760584899999998],
 [-73.8201337, 41.055096899999995],
 [-71.058880099999996, 42.360082500000004],
 [-80.1917902, 25.7616798],
 [144.78786890000001, 13.497020499999998],
 [-95.750781499999988, 30.2093794],
 [-122.4194155, 37.774929499999999],
 [-74.005941299999989, 40.712783700000003],
 [-96.796987900000005, 32.776664199999999],
 [-77.094709200000011, 38.984652000000004],
 [-74.005941299999989, 40.712783700000003],
 [-122.4194155, 37.774929499999999],
 [-86.158068, 39.768402999999999],
 [145.72978909999998, 15.151515300000002],
 [-80.308661900000004, 25.908705600000001],
 [-122.32552539999999, 37.562991700000005],
 [-116.54529209999998, 33.830296099999998],
 [-84.294089900000003, 34.075376200000008],
 [-80.1917902, 25.7616798],
 [-95.369802799999988, 29.7604267],
 [-110.926479, 32.221742900000002],
 [-97.5164276, 35.467560200000001],
 [-94.208817199999999, 36.372853799999994],
 [-118.1445155, 34.147784899999998],
 [-95.369802799999988, 29.7604267],
 [nan, nan],
 [-122.4194155, 37.774929499999999],
 [-71.058880099999996, 42.360082500000004],
 [-118.3812562, 34.187044],
 [-77.036870700000009, 38.907192299999998],
 [-115.13982959999998, 36.169941200000004],
 [-110.926479, 32.221742900000002],
 [-122.4194155, 37.774929499999999],
 [-80.1289321, 26.368306400000002],
 [-90.587068599999995, 38.592553199999998],
 [nan, nan],
 [-80.130045499999994, 25.790654],
 [-117.3961564, 33.953348699999999],
 [-122.4442906, 47.252876799999996],
 [-122.4194155, 37.774929499999999],
 [-117.35059390000001, 33.158093300000004],
 [-87.62979820000001, 41.878113599999999],
 [nan, nan],
 [-73.944157900000008, 40.6781784],
 [-80.308661900000004, 25.908705600000001],
 [nan, nan],
 [-84.294089900000003, 34.075376200000008],
 [-95.634946299999996, 29.619678700000001],
 [-84.296312299999997, 33.774827500000001],
 [nan, nan],
 [-80.1917902, 25.7616798],
 [-80.1917902, 25.7616798],
 [-80.1917902, 25.7616798],
 [-80.1917902, 25.7616798],
 [-122.272747, 37.871592600000007],
 [-121.7568946, 36.910231000000003],
 [-80.355330199999997, 25.8195424],
 [-80.355330199999997, 25.8195424],
 [-80.1917902, 25.7616798],
 [nan, nan],
 [-105.27054560000001, 40.014985600000003],
 [-122.4194155, 37.774929499999999],
 [-97.134178299999988, 32.9412363],
 [-71.133711200000008, 42.353903799999998],
 [-88.266753400000013, 42.333354999999997],
 [-88.266753400000013, 42.333354999999997],
 [-88.266753400000013, 42.333354999999997],
 [-118.451357, 34.148971899999999],
 [nan, nan],
 [145.72978909999998, 15.151515300000002],
 [-80.355330199999997, 25.8195424],
 [-122.4194155, 37.774929499999999],
 [-118.7616764, 34.153339500000001],
 [-118.4164652, 33.919179900000003],
 [-104.990251, 39.739235799999996],
 [-104.95081409999999, 39.617210100000001],
 [-76.612189299999997, 39.290384799999998],
 [-118.4911912, 34.019454299999992],
 [-95.369802799999988, 29.7604267],
 [-80.1917902, 25.7616798],
 [-87.8664579, 42.193358099999998],
 [-95.369802799999988, 29.7604267],
 [-118.24368490000002, 34.052234200000001],
 [-118.24368490000002, 34.052234200000001],
 [-115.13982959999998, 36.169941200000004],
 [-104.990251, 39.739235799999996],
 [-87.844511900000001, 42.171136499999996],
 [-122.4194155, 37.774929499999999],
 [-87.571089799999996, 37.971559200000002],
 [-117.79469420000001, 33.6839473],
 [-87.8664579, 42.193358099999998],
 [-122.4194155, 37.774929499999999],
 [-87.844511900000001, 42.171136499999996],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-122.4786854, 48.751911200000002],
 [-111.83147240000001, 33.4151843],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-115.13982959999998, 36.169941200000004],
 [-96.796987900000005, 32.776664199999999],
 [-75.369889499999999, 40.041599600000005],
 [-74.005941299999989, 40.712783700000003],
 [-96.796987900000005, 32.776664199999999],
 [-77.046921400000002, 38.804835499999996],
 [-118.30896609999998, 34.180839200000008],
 [-118.30896609999998, 34.180839200000008],
 [-84.387982399999999, 33.748995399999998],
 [-87.62979820000001, 41.878113599999999],
 [-117.66255090000001, 33.501693199999998],
 [nan, nan],
 [-77.036870700000009, 38.907192299999998],
 [-118.24368490000002, 34.052234200000001],
 [-85.587228600000003, 42.291706899999994],
 [-77.036870700000009, 38.907192299999998],
 [-118.24368490000002, 34.052234200000001],
 [-94.670791699999995, 38.982228200000002],
 [-122.4194155, 37.774929499999999],
 [-80.843126699999999, 35.227086900000003],
 [-117.1610838, 32.715738000000002],
 [-115.13982959999998, 36.169941200000004],
 [-87.741624599999994, 42.032402500000003],
 [-87.964507699999999, 41.850030199999999],
 [-81.655651000000006, 30.332183799999999],
 [-157.8583333, 21.306944399999999],
 [-74.005941299999989, 40.712783700000003],
 [-118.24368490000002, 34.052234200000001],
 [-110.926479, 32.221742900000002],
 [-87.62979820000001, 41.878113599999999],
 [-157.8583333, 21.306944399999999],
 [-77.036870700000009, 38.907192299999998],
 [-74.005941299999989, 40.712783700000003],
 [-87.741624599999994, 42.032402500000003],
 [-115.13982959999998, 36.169941200000004],
 [-81.872308400000009, 26.640628000000003],
 [-115.13982959999998, 36.169941200000004],
 [-97.743060799999995, 30.267153000000004],
 [-87.741624599999994, 42.032402500000003],
 [-87.62979820000001, 41.878113599999999],
 [-122.06518190000001, 37.910078300000002],
 [-122.48525069999999, 37.859093700000003],
 [-122.06518190000001, 37.910078300000002],
 [nan, nan],
 [-122.48525069999999, 37.859093700000003],
 [145.72978909999998, 15.151515300000002],
 [-118.40035630000001, 34.073620399999996],
 [145.72978909999998, 15.151515300000002],
 [-118.24368490000002, 34.052234200000001],
 [-105.27054560000001, 40.014985600000003],
 [-71.058880099999996, 42.360082500000004],
 [nan, nan],
 [-117.2919818, 33.0369867],
 [-80.588664600000001, 28.034462100000006],
 [-98.493628200000003, 29.424121899999999],
 [-80.588664600000001, 28.034462100000006],
 [-115.13982959999998, 36.169941200000004],
 [-77.106769799999995, 38.879969700000004],
 [-110.926479, 32.221742900000002],
 [-122.48525069999999, 37.859093700000003],
 [-95.369802799999988, 29.7604267],
 [-74.948886000000002, 39.968881700000004],
 [-96.796987900000005, 32.776664199999999],
 [-81.694360500000002, 41.499320000000004],
 [-118.41090890000001, 33.884736100000005],
 [-122.0540996, 37.411269099999998],
 [-71.19562049999999, 42.504716100000003],
 [-75.627458300000001, 40.032581700000001],
 [-74.005941299999989, 40.712783700000003],
 [-122.4194155, 37.774929499999999],
 [-74.005941299999989, 40.712783700000003],
 [-66.105735499999994, 18.465539399999997],
 [-77.036870700000009, 38.907192299999998],
 [-118.41090890000001, 33.884736100000005],
 [-118.24368490000002, 34.052234200000001],
 [-87.964507699999999, 41.850030199999999],
 [-157.8583333, 21.306944399999999],
 [-134.4197222, 58.301944400000004],
 [-118.4911912, 34.019454299999992],
 [-117.6119925, 33.426972800000001],
 [-111.49797290000001, 40.646062200000003],
 [-87.906473599999998, 43.038902499999999],
 [-117.1610838, 32.715738000000002],
 [-77.036870700000009, 38.907192299999998],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-111.9260519, 33.494170399999994],
 [-77.036870700000009, 38.907192299999998],
 [-73.944157900000008, 40.6781784],
 [-157.8583333, 21.306944399999999],
 [-77.036870700000009, 38.907192299999998],
 [-74.005941299999989, 40.712783700000003],
 [-115.13982959999998, 36.169941200000004],
 [-117.6119925, 33.426972800000001],
 [-77.357002799999989, 38.958630700000001],
 [-121.9357918, 37.702152099999999],
 [-87.964507699999999, 41.850030199999999],
 [-73.944157900000008, 40.6781784],
 [-81.379236500000005, 28.538335499999999],
 [-121.9357918, 37.702152099999999],
 [-73.944157900000008, 40.6781784],
 [-111.49797290000001, 40.646062200000003],
 [-87.62979820000001, 41.878113599999999],
 [-104.990251, 39.739235799999996],
 [-115.13982959999998, 36.169941200000004],
 [-74.005941299999989, 40.712783700000003],
 [-95.369802799999988, 29.7604267],
 [-104.990251, 39.739235799999996],
 [-115.13982959999998, 36.169941200000004],
 [-81.385259500000004, 30.240005800000002],
 [-84.549932699999999, 33.952601999999999],
 [-117.14836480000001, 33.493639100000003],
 [-87.62979820000001, 41.878113599999999],
 [-81.794810299999995, 26.142035800000002],
 [-84.387982399999999, 33.748995399999998],
 [-104.990251, 39.739235799999996],
 [-117.8531119, 33.787794399999996],
 [-121.8946761, 36.600237799999995],
 [nan, nan],
 [-74.005941299999989, 40.712783700000003],
 [-118.34062879999999, 33.835849200000006],
 [-74.005941299999989, 40.712783700000003],
 [-118.4911912, 34.019454299999992],
 [-87.62979820000001, 41.878113599999999],
 [-157.8583333, 21.306944399999999],
 [-122.1817252, 37.452959800000002],
 [-74.005941299999989, 40.712783700000003],
 [-105.27054560000001, 40.014985600000003],
 [-74.005941299999989, 40.712783700000003],
 [-118.4911912, 34.019454299999992],
 [-115.13982959999998, 36.169941200000004],
 [-115.13982959999998, 36.169941200000004],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-115.13982959999998, 36.169941200000004],
 [-88.050340599999998, 41.746974899999998],
 [-74.005941299999989, 40.712783700000003],
 [-104.990251, 39.739235799999996],
 [-96.638883300000003, 32.912624000000001],
 [nan, nan],
 [-111.89104740000002, 40.760779299999996],
 [nan, nan],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-115.13982959999998, 36.169941200000004],
 [-111.89104740000002, 40.760779299999996],
 [-74.005941299999989, 40.712783700000003],
 [-122.14301950000001, 37.441883399999995],
 [-74.131809599999997, 40.940376200000003],
 [-74.005941299999989, 40.712783700000003],
 [-122.1817252, 37.452959800000002],
 [-122.4194155, 37.774929499999999],
 [-157.8583333, 21.306944399999999],
 [-115.13982959999998, 36.169941200000004],
 [-115.13982959999998, 36.169941200000004],
 [-95.369802799999988, 29.7604267],
 [-122.52747549999999, 37.925480600000007],
 [nan, nan],
 [-84.549932699999999, 33.952601999999999],
 [-118.4911912, 34.019454299999992],
 [-117.79469420000001, 33.6839473],
 [-105.27054560000001, 40.014985600000003],
 [-122.08079640000001, 37.668820500000002],
 [-74.005941299999989, 40.712783700000003],
 [-81.034814400000002, 34.000710400000003],
 [-81.034814400000002, 34.000710400000003],
 [-84.017690400000006, 33.667610299999993],
 [-74.005941299999989, 40.712783700000003],
 [-93.455787700000002, 45.072464200000006],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-78.825562099999999, 35.823483000000003],
 [-74.005941299999989, 40.712783700000003],
 [-95.369802799999988, 29.7604267],
 [-94.208817199999999, 36.372853799999994],
 [-88.122719900000007, 42.062991499999995],
 [-80.399774800000003, 26.100365399999998],
 [-73.714447700000008, 41.040013500000001],
 [-77.944710200000003, 34.225725500000003],
 [-122.4194155, 37.774929499999999],
 [-117.8531119, 33.787794399999996],
 [-87.62979820000001, 41.878113599999999],
 [-95.824395599999988, 29.785785300000001],
 [nan, nan],
 [-74.005941299999989, 40.712783700000003],
 [-111.929658, 40.562170399999999],
 [-71.058880099999996, 42.360082500000004],
 [-74.005941299999989, 40.712783700000003],
 [-95.369802799999988, 29.7604267],
 [-96.796987900000005, 32.776664199999999],
 [-95.369802799999988, 29.7604267],
 [-122.4194155, 37.774929499999999],
 [-88.207269699999998, 40.110587500000001],
 [-80.245604499999999, 25.942037699999997],
 [-73.357904900000008, 41.141471700000004],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-122.4194155, 37.774929499999999],
 [-77.306373300000004, 38.846223600000002],
 [-77.306373300000004, 38.846223600000002],
 [-122.4194155, 37.774929499999999],
 [-96.400306900000004, 42.499994200000003],
 [-104.98775970000001, 39.647765299999996],
 [-83.149775099999999, 42.606409499999998],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-77.036870700000009, 38.907192299999998],
 [-77.036870700000009, 38.907192299999998],
 [-71.348948400000012, 42.460371899999998],
 [-122.03634960000001, 37.368829999999996],
 [-83.367716799999997, 42.498993599999999],
 [-117.91450359999999, 33.835293200000002],
 [-122.67648159999999, 45.523062200000005],
 [-73.762909700000009, 41.033986200000001],
 [nan, nan],
 [-122.2020794, 47.978984799999999],
 [-80.1917902, 25.7616798],
 [-74.150200699999999, 40.579531700000004],
 [-73.864261299999995, 40.744985900000003],
 [-76.937759999999997, 38.989696700000003],
 [nan, nan],
 [-121.88632859999998, 37.338208200000004],
 [-84.387982399999999, 33.748995399999998],
 [-82.254283400000006, 34.737063899999995],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-95.369802799999988, 29.7604267],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-96.889963599999987, 32.975641499999995],
 [-122.4194155, 37.774929499999999],
 [-111.89104740000002, 40.760779299999996],
 [-87.62979820000001, 41.878113599999999],
 [-74.045140500000002, 40.841211100000002],
 [-86.781601599999988, 36.162663799999997],
 [-79.995886400000003, 40.440624799999995],
 [nan, nan],
 [-82.254283400000006, 34.737063899999995],
 [-74.005941299999989, 40.712783700000003],
 [-84.549932699999999, 33.952601999999999],
 [-95.369802799999988, 29.7604267],
 [-84.512019600000002, 39.103118200000004],
 [-118.40035630000001, 34.073620399999996],
 [-77.046921400000002, 38.804835499999996],
 [-77.046921400000002, 38.804835499999996],
 [nan, nan],
 [-118.3870173, 33.744461299999998],
 [-80.149490099999994, 26.011201400000001],
 [-95.616054900000009, 30.097162100000006],
 [-117.9289469, 33.618910100000001],
 [-74.059307499999989, 40.8598219],
 [-78.886694300000002, 33.689060299999994],
 [-95.369802799999988, 29.7604267],
 [-84.198579000000009, 34.028925899999997],
 [-122.12151200000001, 47.673988100000003],
 [-77.046921400000002, 38.804835499999996],
 [-122.34808999999998, 37.5778696],
 [-80.1917902, 25.7616798],
 [-74.005941299999989, 40.712783700000003],
 [-105.0749801, 40.397761200000005],
 [-71.19562049999999, 42.504716100000003],
 [-81.40757099999999, 28.291955699999995],
 [144.75022280000002, 13.476282399999999],
 [-122.28524729999999, 37.831315999999994],
 [-91.239580700000005, 43.801355600000001],
 [nan, nan],
 [-73.538734099999985, 41.053430200000001],
 [-89.58898640000001, 40.693648799999998],
 [-122.03218229999999, 37.322997799999996],
 [-84.294089900000003, 34.075376200000008],
 [-117.1610838, 32.715738000000002],
 [-74.549328400000007, 40.706617399999999],
 [-74.005941299999989, 40.712783700000003],
 [-71.058880099999996, 42.360082500000004],
 [nan, nan],
 [-76.612189299999997, 39.290384799999998],
 [-79.9414266, 37.270970399999996],
 [-122.4194155, 37.774929499999999],
 [-95.616054900000009, 30.097162100000006],
 [-74.005941299999989, 40.712783700000003],
 [-87.988955599999997, 41.931696000000002],
 [-96.796987900000005, 32.776664199999999],
 [-97.743060799999995, 30.267153000000004],
 [-84.140192599999992, 34.207319599999998],
 [-118.4694832, 33.9850469],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-122.4194155, 37.774929499999999],
 [-121.95523559999999, 37.354107899999995],
 [-122.4194155, 37.774929499999999],
 [-121.95523559999999, 37.354107899999995],
 [-112.00105009999999, 40.691613200000006],
 [-74.005941299999989, 40.712783700000003],
 [-87.988955599999997, 41.931696000000002],
 [-97.743060799999995, 30.267153000000004],
 [-74.005941299999989, 40.712783700000003],
 [-95.369802799999988, 29.7604267],
 [-105.27054560000001, 40.014985600000003],
 [-74.005941299999989, 40.712783700000003],
 [-122.27111370000002, 37.804363700000003],
 [-122.4194155, 37.774929499999999],
 [-95.369802799999988, 29.7604267],
 [-97.228902900000008, 32.8342952],
 [-74.005941299999989, 40.712783700000003],
 [-122.20558829999999, 47.760950000000001],
 [-121.8995741, 37.432334099999999],
 [-105.27054560000001, 40.014985600000003],
 [-96.889963599999987, 32.975641499999995],
 [-122.03218229999999, 37.322997799999996],
 [-122.03218229999999, 37.322997799999996],
 [-122.03218229999999, 37.322997799999996],
 [-80.1917902, 25.7616798],
 [-122.08385109999999, 37.386051700000003],
 [-79.995886400000003, 40.440624799999995],
 [-122.33207079999998, 47.606209499999999],
 [nan, nan],
 [-74.005941299999989, 40.712783700000003],
 [-96.948894499999994, 32.814017699999994],
 [-74.005941299999989, 40.712783700000003],
 [-74.417096999999998, 40.759822700000001],
 [-72.993296900000004, 40.885835299999997],
 [-95.369802799999988, 29.7604267],
 [-85.864940799999999, 37.703064599999998],
 [nan, nan],
 [-87.62979820000001, 41.878113599999999],
 [-96.698885599999997, 33.019843100000003],
 [-75.439593099999996, 40.045823999999996],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-95.369802799999988, 29.7604267],
 [-71.802293399999996, 42.262593200000005],
 [-80.1917902, 25.7616798],
 [-111.8507662, 40.391617199999999],
 [-96.639782199999999, 33.197246500000006],
 [-74.667222599999988, 40.357297600000003],
 [-85.864940799999999, 37.703064599999998],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-87.62979820000001, 41.878113599999999],
 [-71.209221400000004, 42.337041299999996],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-121.88632859999998, 37.338208200000004],
 [-122.33207079999998, 47.606209499999999],
 [-111.8507662, 40.391617199999999],
 [-111.8507662, 40.391617199999999],
 [-71.209221400000004, 42.337041299999996],
 [-121.88632859999998, 37.338208200000004],
 [-122.33207079999998, 47.606209499999999],
 [-83.221873099999996, 42.473368799999996],
 [-122.4194155, 37.774929499999999],
 [-121.88632859999998, 37.338208200000004],
 [-111.8507662, 40.391617199999999],
 [-157.8583333, 21.306944399999999],
 [nan, nan],
 [-74.005941299999989, 40.712783700000003],
 [-157.8583333, 21.306944399999999],
 [-74.005941299999989, 40.712783700000003],
 [-96.698885599999997, 33.019843100000003],
 [-74.005941299999989, 40.712783700000003],
 [-95.209100599999999, 29.691062500000001],
 [nan, nan],
 [nan, nan],
 [-118.072846, 34.080565100000001],
 [-118.072846, 34.080565100000001],
 [-75.165221500000001, 39.9525839],
 [-111.86382260000001, 40.524671099999999],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [nan, nan],
 [-122.1817252, 37.452959800000002],
 [-122.4194155, 37.774929499999999],
 [-106.48502169999999, 31.761877800000001],
 [-96.698885599999997, 33.019843100000003],
 [nan, nan],
 [nan, nan],
 [-87.828954799999991, 42.127526700000004],
 [-77.036870700000009, 38.907192299999998],
 [-117.8678338, 33.745573100000001],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-122.08079640000001, 37.668820500000002],
 [-87.62979820000001, 41.878113599999999],
 [-73.357904900000008, 41.141471700000004],
 [-117.1610838, 32.715738000000002],
 [-74.005941299999989, 40.712783700000003],
 [-122.03634960000001, 37.368829999999996],
 [nan, nan],
 [-96.698885599999997, 33.019843100000003],
 [-106.48502169999999, 31.761877800000001],
 [-96.698885599999997, 33.019843100000003],
 [-71.058880099999996, 42.360082500000004],
 [-118.7616764, 34.153339500000001],
 [-74.005941299999989, 40.712783700000003],
 [-87.62979820000001, 41.878113599999999],
 [-80.1917902, 25.7616798],
 [-114.62769159999999, 32.6926512],
 [nan, nan],
 [-117.72560829999999, 33.567684200000002],
 [nan, nan],
 [-72.588422200000011, 42.341756500000002],
 [-80.130045499999994, 25.790654],
 [-87.62979820000001, 41.878113599999999],
 [nan, nan],
 [-122.1817252, 37.452959800000002],
 [-71.058880099999996, 42.360082500000004],
 [-89.781174500000006, 30.275194500000001],
 [-112.07403729999999, 33.448377100000002],
 [-118.83759369999999, 34.170560899999998],
 [-94.208817199999999, 36.372853799999994],
 [-122.4194155, 37.774929499999999],
 [nan, nan],
 [-122.4194155, 37.774929499999999],
 [nan, nan],
 [nan, nan],
 [-77.036870700000009, 38.907192299999998],
 [-84.213530899999995, 33.941212700000001],
 [-95.301062400000006, 32.351260100000005],
 [-111.929658, 40.562170399999999],
 [-93.750178900000009, 32.525151600000001],
 [-89.781174500000006, 30.275194500000001],
 [-122.4194155, 37.774929499999999],
 [nan, nan],
 [-122.4194155, 37.774929499999999],
 [-95.369802799999988, 29.7604267],
 [-122.4194155, 37.774929499999999],
 [-122.4194155, 37.774929499999999],
 [-87.906473599999998, 43.038902499999999],
 [-122.32552539999999, 37.562991700000005],
 [-80.137317400000015, 26.122438600000002],
 [-118.1597929, 34.0005691],
 [-112.07403729999999, 33.448377100000002],
 [-87.906473599999998, 43.038902499999999],
 [-122.41108349999999, 37.630490399999999],
 [-97.330053000000007, 37.687176100000002],
 [-97.015007799999992, 32.954568699999996],
 [-87.62979820000001, 41.878113599999999],
 [-74.005941299999989, 40.712783700000003],
 [-73.780144700000008, 40.733517900000002],
 [-80.1917902, 25.7616798],
 [-118.24368490000002, 34.052234200000001],
 [-74.043473599999999, 40.885932500000003],
 [nan, nan],
 [-94.208817199999999, 36.372853799999994],
 [-74.005941299999989, 40.712783700000003],
 [-94.208817199999999, 36.372853799999994],
 [-73.923461900000007, 40.764357399999994],
 [-74.075418900000003, 40.944542799999994],
 [-74.005941299999989, 40.712783700000003],
 [-122.4194155, 37.774929499999999],
 [-73.538734099999985, 41.053430200000001],
 [-118.41338940000001, 34.053660799999996],
 [-95.369802799999988, 29.7604267],
 [-95.369802799999988, 29.7604267],
 [-71.10973349999999, 42.373615799999996],
 [-74.005941299999989, 40.712783700000003],
 [-122.32552539999999, 37.562991700000005],
 [nan, nan],
 [-104.990251, 39.739235799999996],
 [-96.236846499999999, 41.283195800000001],
 [-74.364612199999996, 40.714637599999996],
 [-88.060380600000002, 44.448880500000001],
 [-74.005941299999989, 40.712783700000003],
 [-75.316295099999991, 40.069832099999999],
 [-122.33207079999998, 47.606209499999999],
 [-117.93534129999999, 34.106952700000001],
 [-122.4194155, 37.774929499999999],
 [-111.83147240000001, 33.4151843],
 [-91.665623200000013, 41.9778795],
 [nan, nan],
 [-112.07403729999999, 33.448377100000002],
 [-84.213530899999995, 33.941212700000001],
 [-89.810085799999996, 35.086757700000007],
 [nan, nan],
 [nan, nan],
 [-88.089506099999994, 41.647530600000003],
 [-73.437898799999999, 41.1953739],
 [-74.005941299999989, 40.712783700000003],
 [-118.24368490000002, 34.052234200000001],
 [-75.513811799999999, 40.036218400000003],
 [-95.369802799999988, 29.7604267],
 [-80.397273599999991, 27.638643399999999],
 [-92.019842699999998, 30.2240897],
 [-74.005941299999989, 40.712783700000003],
 [-77.036870700000009, 38.907192299999998],
 [-118.3531311, 33.961680100000002],
 [nan, nan],
 [-117.35059390000001, 33.158093300000004],
 [-96.698885599999997, 33.019843100000003],
 [-96.796987900000005, 32.776664199999999],
 [-74.005941299999989, 40.712783700000003],
 [-80.1917902, 25.7616798],
 [-75.513811799999999, 40.036218400000003],
 [-80.752607999999995, 32.216315999999999],
 [-71.058880099999996, 42.360082500000004],
 [-122.4194155, 37.774929499999999],
 [nan, nan],
 [-80.1917902, 25.7616798],
 [-112.18598659999999, 33.538652299999995],
 [-74.005941299999989, 40.712783700000003],
 [-118.6089752, 34.165357],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-73.944157900000008, 40.6781784],
 [-106.60555340000001, 35.085333599999998],
 [-80.579510999999997, 35.408751700000003],
 [-121.87467890000001, 37.6624312],
 [nan, nan],
 [-118.4694832, 33.9850469],
 [-120.84659409999999, 37.494656799999994],
 [-77.036870700000009, 38.907192299999998],
 [-118.28169299999999, 33.831674499999998],
 [-77.036870700000009, 38.907192299999998],
 [-75.383552500000008, 40.101285600000004],
 [-122.03218229999999, 37.322997799999996],
 [-77.177260400000009, 38.933867600000006],
 [-118.3531311, 33.961680100000002],
 [-74.005941299999989, 40.712783700000003],
 [-122.4194155, 37.774929499999999],
 [-73.939568700000009, 42.8142432],
 [-121.98857190000001, 37.548269700000006],
 [-77.036870700000009, 38.907192299999998],
 [-94.208817199999999, 36.372853799999994],
 [-98.230012400000007, 26.203407100000003],
 [-74.005941299999989, 40.712783700000003],
 [-80.1917902, 25.7616798],
 [-74.005941299999989, 40.712783700000003],
 [-117.8678338, 33.745573100000001],
 [-74.005941299999989, 40.712783700000003],
 [-83.555211999999997, 41.663938299999998],
 [-121.8995741, 37.432334099999999],
 [-86.158068, 39.768402999999999],
 [-95.369802799999988, 29.7604267],
 [nan, nan],
 [-117.7325848, 33.989818799999995],
 [-73.997639000000007, 40.848155600000005],
 [-74.005941299999989, 40.712783700000003],
 [-112.07403729999999, 33.448377100000002],
 [-87.62979820000001, 41.878113599999999],
 [-74.005941299999989, 40.712783700000003],
 [-73.538734099999985, 41.053430200000001],
 [nan, nan],
 [-122.4194155, 37.774929499999999],
 [-87.787840799999998, 42.069750899999995],
 [nan, nan],
 [-97.5164276, 35.467560200000001],
 [-66.111068200000005, 18.3615548],
 [-122.29289740000002, 47.185378499999999],
 [-122.4194155, 37.774929499999999],
 [-87.62979820000001, 41.878113599999999],
 [-118.4164652, 33.919179900000003],
 [-122.4194155, 37.774929499999999],
 [-115.13982959999998, 36.169941200000004],
 [-74.005941299999989, 40.712783700000003],
 [-74.005941299999989, 40.712783700000003],
 [-72.571755100000004, 41.848987200000003],
 [-74.075418900000003, 40.944542799999994],
 [-72.949270299999995, 41.671764799999998],
 [-117.79469420000001, 33.6839473],
 [-121.88632859999998, 37.338208200000004],
 [-118.8073729, 34.146646699999998],
 [-74.790717999999998, 40.328440200000003],
 [-95.369802799999988, 29.7604267],
 [-117.68894399999999, 34.012234599999999],
 [-74.364724699999996, 40.820062299999996],
 [-122.27580079999998, 37.520214500000002],
 [-80.137317400000015, 26.122438600000002],
 [-93.265010799999999, 44.977753],
 [-80.137317400000015, 26.122438600000002],
 ...]

In [ ]:
numpy.random.seed(0)
seaborn.set()
uniform_data = lon
ax = seaborn.heatmap(uniform_data)


KeyboardInterrupt


In [10]:
uniform_data


Out[10]:
array([[ 0.5488135 ,  0.71518937,  0.60276338,  0.54488318,  0.4236548 ,
         0.64589411,  0.43758721,  0.891773  ,  0.96366276,  0.38344152,
         0.79172504,  0.52889492],
       [ 0.56804456,  0.92559664,  0.07103606,  0.0871293 ,  0.0202184 ,
         0.83261985,  0.77815675,  0.87001215,  0.97861834,  0.79915856,
         0.46147936,  0.78052918],
       [ 0.11827443,  0.63992102,  0.14335329,  0.94466892,  0.52184832,
         0.41466194,  0.26455561,  0.77423369,  0.45615033,  0.56843395,
         0.0187898 ,  0.6176355 ],
       [ 0.61209572,  0.616934  ,  0.94374808,  0.6818203 ,  0.3595079 ,
         0.43703195,  0.6976312 ,  0.06022547,  0.66676672,  0.67063787,
         0.21038256,  0.1289263 ],
       [ 0.31542835,  0.36371077,  0.57019677,  0.43860151,  0.98837384,
         0.10204481,  0.20887676,  0.16130952,  0.65310833,  0.2532916 ,
         0.46631077,  0.24442559],
       [ 0.15896958,  0.11037514,  0.65632959,  0.13818295,  0.19658236,
         0.36872517,  0.82099323,  0.09710128,  0.83794491,  0.09609841,
         0.97645947,  0.4686512 ],
       [ 0.97676109,  0.60484552,  0.73926358,  0.03918779,  0.28280696,
         0.12019656,  0.2961402 ,  0.11872772,  0.31798318,  0.41426299,
         0.0641475 ,  0.69247212],
       [ 0.56660145,  0.26538949,  0.52324805,  0.09394051,  0.5759465 ,
         0.9292962 ,  0.31856895,  0.66741038,  0.13179786,  0.7163272 ,
         0.28940609,  0.18319136],
       [ 0.58651293,  0.02010755,  0.82894003,  0.00469548,  0.67781654,
         0.27000797,  0.73519402,  0.96218855,  0.24875314,  0.57615733,
         0.59204193,  0.57225191],
       [ 0.22308163,  0.95274901,  0.44712538,  0.84640867,  0.69947928,
         0.29743695,  0.81379782,  0.39650574,  0.8811032 ,  0.58127287,
         0.88173536,  0.69253159]])